Skip to content

Commit

Permalink
[DT] Retire UpperBoundTileSizeOp op and relevant passes. (iree-org#18045
Browse files Browse the repository at this point in the history
)

This is a followup for
iree-org@9aaae34,
the op is no longer needed.

Signed-off-by: hanhanW <[email protected]>
  • Loading branch information
hanhanW authored Jul 30, 2024
1 parent 18c183f commit 4c0a18a
Show file tree
Hide file tree
Showing 10 changed files with 1 addition and 250 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -443,55 +443,6 @@ getMaterializeEncodingFn(IREE::HAL::ExecutableTargetAttr targetAttr) {
};
}

// Like getMaterializeEncodingFn, but iterating over an array of targets and
// returning the max of all tile sizes from each target, checking that other
// materialization info (permutations) agree.
//
// This is useful to compute padding amounts, in the materialization of
// UpperBoundTileSizeOp, in top-level functions that are not part of one HAL
// executable variant. There, the padding amounts only control the size of
// allocated buffers, so it's OK to over-estimate (only wasting some memory)
// but not under-estimate (would cause buffer overruns) padding amounts.
static MaterializeEncodingFn getUpperBoundMaterializeEncodingFn(
ArrayRef<IREE::HAL::ExecutableTargetAttr> targetAttrs) {
return
[targetAttrs](
RankedTensorType tensorType) -> FailureOr<MaterializeEncodingInfo> {
FailureOr<MaterializeEncodingInfo> result; // Defaults to failure.
for (auto targetAttr : targetAttrs) {
FailureOr<MaterializeEncodingInfo> info =
materializeEncodingForTarget(tensorType, targetAttr);
if (failed(info)) {
// No info at this iteration. Ignore and continue.
continue;
}
if (failed(result)) {
// No preexisting result. Use this iteration's info and continue.
result = info;
continue;
}
// Merge this iteration's info into preexisting result info.
// Check that permutations match, then record the max of tile sizes.
if (info->innerDimsPos != result->innerDimsPos ||
info->outerDimsPerm != result->outerDimsPerm) {
return failure();
}
if (info->innerTileSizes.size() != result->innerTileSizes.size()) {
return failure();
}
for (unsigned i = 0; i < info->innerTileSizes.size(); ++i) {
if (ShapedType::isDynamic(info->innerTileSizes[i])) {
result->innerTileSizes[i] = ShapedType::kDynamic;
} else {
result->innerTileSizes[i] =
std::max(result->innerTileSizes[i], info->innerTileSizes[i]);
}
}
}
return result;
};
}

static FailureOr<MaterializeEncodingValueInfo>
chooseDynamicEncodingInfoVMVXMicrokernels(RankedTensorType tensorType,
OpBuilder &builder, Location loc) {
Expand Down Expand Up @@ -665,66 +616,4 @@ std::unique_ptr<Pass> createCPUMaterializeDeviceEncodingPass() {
return std::make_unique<CPUMaterializeDeviceEncodingPass>();
}

// NOTE: this runs on host modules.
struct CPUMaterializeUpperBoundTileSizePass
: public CPUMaterializeUpperBoundTileSizeBase<
CPUMaterializeUpperBoundTileSizePass> {
CPUMaterializeUpperBoundTileSizePass() = default;

void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<arith::ArithDialect>();
}

void runOnOperation() override {
auto moduleOp = getOperation();

// Run required analysis passes.
IREE::Stream::AffinityAnalysis affinityAnalysis(moduleOp);
if (failed(affinityAnalysis.run())) {
return signalPassFailure();
}
IREE::HAL::DeviceAnalysis deviceAnalysis(moduleOp);
if (failed(deviceAnalysis.run())) {
return signalPassFailure();
}

for (auto funcOp : moduleOp.getOps<FunctionOpInterface>()) {
// Gather the required executable targets for the function. Note that it's
// possible there are more required for ops nested within the function but
// this pass is a hack and can't handle that :shrug:.
auto executableTargets = getFuncExecutableTargetAttrs(
funcOp, affinityAnalysis, deviceAnalysis);
if (!executableTargets) {
funcOp.emitOpError()
<< "could not determine executable targets for the function";
return signalPassFailure();
} else if (executableTargets->empty()) {
// Probably no tensors.
continue;
}

// Get patterns specialized for the executable targets used by the
// function.
RewritePatternSet patterns(&getContext());
MaterializeEncodingFn materializeEncodingFn =
getUpperBoundMaterializeEncodingFn(executableTargets->getArrayRef());
if (!materializeEncodingFn)
return signalPassFailure();
populateMaterializeUpperBoundTileSizePatterns(patterns,
materializeEncodingFn);

// Run patterns on the function.
if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
funcOp.emitOpError(
"encoding padding sizes materialization pattern failed");
return signalPassFailure();
}
}
}
};

std::unique_ptr<Pass> createCPUMaterializeUpperBoundTileSizePass() {
return std::make_unique<CPUMaterializeUpperBoundTileSizePass>();
}

} // namespace mlir::iree_compiler
17 changes: 0 additions & 17 deletions compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,6 @@ namespace mlir::iree_compiler {
std::unique_ptr<Pass> createCPUMaterializeHostEncodingPass();
std::unique_ptr<Pass> createCPUMaterializeDeviceEncodingPass();

/// Like createLLVMCPUMaterializeEncodingPass, but specifically for
/// encoding.upper_bound_tile_size, converting it to constants.
///
/// Unlike createLLVMCPUMaterializeEncodingPass, this does not require the
/// op to have a specific HAL target attribute. Instead, this will iterate over
/// all HAL target attributes, use the maximum of all padding sizes from each
/// target. This is needed because in top-level functions outside of HAL
/// executables, there are upper_bound_tile_size ops (created by SetEncoding,
/// and computing buffer allocation sizes) and there isn't one specific HAL
/// target.
///
/// In the VMVX case where padding sizes are not compile-time constants, this
/// converts upper_bound_tile_size to some specific constant size (currently 16)
/// that is the largest tile size that we can use in VMVX, and can be adjusted
// as needed.
std::unique_ptr<Pass> createCPUMaterializeUpperBoundTileSizePass();

/// Adds CPU bufferization passes to the pipeline.
void addCPUBufferizePasses(OpPassManager &funcPassManager);

Expand Down
6 changes: 0 additions & 6 deletions compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ def CPUMaterializeDeviceEncoding :
let constructor = "mlir::iree_compiler::createCPUMaterializeDeviceEncodingPass()";
}

def CPUMaterializeUpperBoundTileSize :
Pass<"iree-codegen-cpu-materialize-upper-bound-tile-size", "mlir::ModuleOp"> {
let summary = "Materialize upper_bound_tile_size to constants.";
let constructor = "mlir::iree_compiler::createCPUMaterializeUpperBoundTileSizePass()";
}

def CPULowerToUKernels :
Pass<"iree-codegen-cpu-lower-to-ukernels", ""> {
let summary =
Expand Down
3 changes: 0 additions & 3 deletions compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,6 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

void populateMaterializeUpperBoundTileSizePatterns(
RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn);

// Returns true if `encoding` represents a narrow-N matmul RESULT, e.g. the
// result of a matvec.
bool isNarrowNResult(IREE::Encoding::EncodingAttr encoding);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,6 @@ struct MaterializeEncodingIntoNopPass
return signalPassFailure();
}

{
RewritePatternSet patterns(context);
populateMaterializeUpperBoundTileSizePatterns(patterns,
materializeEncodingFn);
if (failed(
applyPatternsAndFoldGreedily(operation, std::move(patterns)))) {
operation.emitOpError(
"encoding padding sizes materialization pattern failed");
return signalPassFailure();
}
}

// Add patterns to resolve dims ops and cleanups.
{
RewritePatternSet patterns(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,38 +289,6 @@ static FailureOr<tensor::UnPackOp> lowerUnsetEncodingToUnpackOp(
*innerTileSizesOfr, materializeEncodingInfo->outerDimsPerm);
}

static FailureOr<SmallVector<Value>> lowerUpperBoundTileSizeOpToConstants(
RewriterBase &rewriter,
IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp,
MaterializeEncodingFn materializeEncodingFn) {
Location loc = upperBoundTileSizeOp.getLoc();
RankedTensorType tensorType = upperBoundTileSizeOp.getTensorType();
FailureOr<MaterializeEncodingInfo> materializeEncodingInfo =
materializeEncodingFn(tensorType);
if (failed(materializeEncodingInfo)) {
return rewriter.notifyMatchFailure(upperBoundTileSizeOp,
"unhandled source encoding");
}
ArrayRef<int64_t> innerTileSizes = materializeEncodingInfo->innerTileSizes;
ArrayRef<int64_t> innerDimsPos = materializeEncodingInfo->innerDimsPos;
SmallVector<Value> results(tensorType.getRank());
for (unsigned i = 0; i < innerTileSizes.size(); ++i) {
int64_t tileSize = innerTileSizes[i];
if (ShapedType::isDynamic(tileSize)) {
tileSize = 16;
}
results[innerDimsPos[i]] =
rewriter.create<arith::ConstantIndexOp>(loc, tileSize);
}
// For the dims that have no inner tiles, use 1 as tile size to avoid padding.
for (unsigned i = 0; i < results.size(); ++i) {
if (!results[i]) {
results[i] = rewriter.create<arith::ConstantIndexOp>(loc, 1);
}
}
return results;
}

static FailureOr<Operation *>
lowerContractionOpWithEncoding(RewriterBase &rewriter,
linalg::LinalgOp linalgOp, ValueRange operands,
Expand Down Expand Up @@ -788,36 +756,6 @@ struct UnsetEncodingOpToUnPackOpConversion
}
};

/// Convert `upper_bound_tile_size` op to `constant` op. If the
/// `materializeEncodingFn` returns a failure, the pattern will materialize it
/// to the same shape.
struct UpperBoundTileSizeToConstantOpConversion
: public OpRewritePattern<IREE::Encoding::UpperBoundTileSizeOp> {
UpperBoundTileSizeToConstantOpConversion(
MLIRContext *context, MaterializeEncodingFn materializeEncodingFn)
: OpRewritePattern<IREE::Encoding::UpperBoundTileSizeOp>(context),
materializeEncodingFn(materializeEncodingFn) {}

LogicalResult
matchAndRewrite(IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp,
PatternRewriter &rewriter) const override {

auto constants = lowerUpperBoundTileSizeOpToConstants(
rewriter, upperBoundTileSizeOp, materializeEncodingFn);
if (failed(constants)) {
SmallVector<Value> results(upperBoundTileSizeOp.getNumResults(),
rewriter.create<arith::ConstantIndexOp>(
upperBoundTileSizeOp.getLoc(), 1));
rewriter.replaceOp(upperBoundTileSizeOp, results);
return success();
}
rewriter.replaceOp(upperBoundTileSizeOp, *constants);
return success();
}

MaterializeEncodingFn materializeEncodingFn;
};

/// Generic pattern to convert operation that is in Destination Passing Style.
template <typename OpTy>
struct MaterializeDPSOperation : public OpMaterializeEncodingPattern<OpTy> {
Expand Down Expand Up @@ -959,10 +897,4 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
context, typeConverter, materializeEncodingValueFn);
}

void populateMaterializeUpperBoundTileSizePatterns(
RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn) {
patterns.insert<UpperBoundTileSizeToConstantOpConversion>(
patterns.getContext(), materializeEncodingFn);
}

} // namespace mlir::iree_compiler
23 changes: 0 additions & 23 deletions compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -53,29 +53,6 @@ def IREEEncoding_SetEncodingOp : IREEEncoding_PureOp<"set_encoding",[
}];
}

//===----------------------------------------------------------------------===//
// upper_bound_tile_size op.
//===----------------------------------------------------------------------===//

def IREEEncoding_UpperBoundTileSizeOp : IREEEncoding_PureOp<"upper_bound_tile_size",
[Pure]> {
let summary = "returns an upper bound on tile sizes";
let description = [{
This returns the largest tile sizes that might result from materialization
of the given encoding. This can be used outside of target-specific code, so
there may be multiple targets, and this will return the maximum tile size
from iterating over all of them. The evaluation happens in the
MaterializeUpperBoundTileSize pass.
}];

let arguments = (ins TypeAttrOf<AnyRankedTensor>:$tensorType);
let results = (outs Variadic<Index>:$results);

let assemblyFormat = [{
attr-dict $tensorType `->` type($results)
}];
}

//===----------------------------------------------------------------------===//
// unset_encoding op.
//===----------------------------------------------------------------------===//
Expand Down
7 changes: 0 additions & 7 deletions compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include <memory>

#include "iree/compiler/Codegen/Common/CPU/Passes.h"
#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "iree/compiler/Dialect/HAL/Target/Devices/LocalDevice.h"
Expand Down Expand Up @@ -313,12 +312,6 @@ void buildHALTransformPassPipeline(OpPassManager &passManager,
buildHALConfigurationPassPipeline(passManager, targetRegistry,
targetOptions, hooks);

// HACK: this should not be here and will be going away. It exists for
// lowering iree_linalg_ext.upper_bound_tile_size ops that exist on the
// host. We should be using stream ops for performing such calculations that
// we can attach affinities to and understand what devices are being used.
passManager.addPass(createCPUMaterializeUpperBoundTileSizePass());

// Preprocess executables using an external tool. The tool may mutate one or
// more variants and even insert or remove variants.
for (auto command : clPreprocessExecutablesWith) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,7 @@ void registerUtilExternalModels(DialectRegistry &registry) {
registry.addExtension(
+[](MLIRContext *context, IREE::Encoding::IREEEncodingDialect *dialect) {
UnhoistableOpInterfaceHelper<
IREE::Encoding::SetEncodingOp,
IREE::Encoding::UpperBoundTileSizeOp>::registerOpInterface(context);
IREE::Encoding::SetEncodingOp>::registerOpInterface(context);
});
// Register hoistable type interfaces for linalg ops.
// We have a specific allow-list for Linalg ops because we want to consider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ class MaterializeHomogeneousEncodingsPass
}

OpPassManager passManager(moduleOp.getOperationName());
passManager.addPass(createCPUMaterializeUpperBoundTileSizePass());
passManager.addPass(createCPUMaterializeHostEncodingPass());
if (failed(runPipeline(passManager, moduleOp))) {
return signalPassFailure();
Expand Down

0 comments on commit 4c0a18a

Please sign in to comment.