Skip to content

Commit

Permalink
[Codegen] Support inferring scalable vector sizes (iree-org#17891)
Browse files Browse the repository at this point in the history
This patch extends generic vectorization to support inferring scalable
vector sizes for linalg ops (using the
ScalableValueBoundsConstraintSet).

Note: Inferring scalable sizes for tensor.pack/unpack is not supported.

---------

Signed-off-by: Benjamin Maxwell <[email protected]>
  • Loading branch information
MacDue authored Jul 19, 2024
1 parent 57361bc commit cfc79ea
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 13 deletions.
45 changes: 32 additions & 13 deletions compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace {
struct VectorizationTileSizes {
SmallVector<int64_t> destShape;
SmallVector<int64_t> vectorSizes;
SmallVector<bool> vectorScalableFlags;
};

/// Returns a VectorizationTileSizes which contains the inferred bounded result
Expand All @@ -41,13 +42,25 @@ static std::optional<VectorizationTileSizes> inferSizesFromIR(Value val);
/// Returns std::nullopt if vector sizes can't be inferred.
static std::optional<VectorizationTileSizes>
inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional<OpResult> opResult) {
LLVM_DEBUG(VEC_DBGS() << "Inferring sizes for:\n"
<< linalgOp << " with OpResult.resultNumber="
<< opResult->getResultNumber() << "\n");
LLVM_DEBUG({
VEC_DBGS() << "Inferring sizes for:\n" << linalgOp;
if (opResult) {
VEC_DBGS() << " with OpResult.resultNumber="
<< opResult->getResultNumber();
}
VEC_DBGS() << '\n';
});

std::optional<VscaleRange> vscaleRange;
if (!opResult) {
// Note: Inferring scalable sizes is not supported is `opResult` is set
// (which is used to compute sizes for tensor.pack/unpack).
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp);
vscaleRange = getDefaultVscaleRange(targetAttr);
}

VectorizationTileSizes result;
unsigned numDims = linalgOp.getNumLoops();

for (int dim = 0; dim < numDims; ++dim) {
// Map dimension `dim` to an operand dimension that we will use to
// traverse the U-D chain to get `dim` vector size information.
Expand All @@ -63,22 +76,21 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional<OpResult> opResult) {
// Trivial case: `dim` size is available in the operand type.
int64_t dimSize = llvm::cast<ShapedType>(firstOperand.getType())
.getShape()[firstOperandDim];
bool dimScalable = false;
if (!ShapedType::isDynamic(dimSize)) {
result.vectorSizes.push_back(dimSize);
result.vectorScalableFlags.push_back(dimScalable);
LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize
<< "' for dimension '" << dim << "'\n");
continue;
}

// Use ValueBounds analysis to infer `dim` size upper bound.
FailureOr<int64_t> maybeDimBound;
FailureOr<DimBoundSize> maybeDimBound;
for (auto operandDimPair : operandDimPairs) {
Value operand = operandDimPair.first;
unsigned operandDim = operandDimPair.second;
maybeDimBound = ValueBoundsConstraintSet::computeConstantBound(
presburger::BoundType::UB, {operand, operandDim},
/*stopCondition=*/nullptr, /*closedUB=*/true);

maybeDimBound = computeDimUpperBound(operand, operandDim, vscaleRange);
if (succeeded(maybeDimBound)) {
break;
}
Expand All @@ -88,13 +100,19 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional<OpResult> opResult) {
return std::nullopt;
}

dimSize = maybeDimBound.value();
dimSize = maybeDimBound->baseSize;
dimScalable = maybeDimBound->scalable;
result.vectorSizes.push_back(dimSize);
result.vectorScalableFlags.push_back(dimScalable);

LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize
<< (dimScalable ? " x vscale" : "")
<< "' for dimension '" << dim << "'\n");
}

if (opResult) {
assert(!llvm::is_contained(result.vectorScalableFlags, true) &&
"inferring scalable bounds with `opResult` not supported!");
result.destShape = linalgOp.getIndexingMapMatchingResult(opResult.value())
.compose(result.vectorSizes);
}
Expand Down Expand Up @@ -244,12 +262,14 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) {

// Try to infer the vector sizes from the IR.
std::optional<SmallVector<int64_t>> vectorSizes;
SmallVector<bool> scalableFlags;
TypeSwitch<Operation *, void>(op)
.Case<linalg::LinalgOp>([&](linalg::LinalgOp linalgOp) {
std::optional<VectorizationTileSizes> result =
inferSizesFromIR(linalgOp, /*opResult=*/std::nullopt);
if (result) {
vectorSizes = result->vectorSizes;
scalableFlags = result->vectorScalableFlags;
}
})
.Case<tensor::PackOp, tensor::UnPackOp>([&](auto op) {
Expand All @@ -269,9 +289,8 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) {
.Default([&](Operation *) {});

if (vectorSizes) {
// This can't identify scalable flags, so pad them with `false`.
return std::make_pair(vectorSizes.value(),
SmallVector<bool>(vectorSizes->size(), false));
scalableFlags.resize(vectorSizes->size(), false);
return std::make_pair(vectorSizes.value(), scalableFlags);
}
return std::nullopt;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,42 @@ func.func @generic_unpack_infer_vector_size(%arg0: tensor<?x?x16x16xf32>, %arg1:
// CHECK-MASK: %[[GENERIC_SRC:.+]] = vector.transfer_read %[[UNPACK_WRITE]]{{.+}}, %[[GENERIC_MASK]]
// CHECK-MASK: %[[EXP:.+]] = math.exp %[[GENERIC_SRC]]
// CHECK-MASK: vector.transfer_write %[[EXP]]{{.+}}, %[[GENERIC_MASK]]

// -----

#aarch64_sve = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", target_triple = "aarch64-none-elf"}>
#map = affine_map<()[s0] -> (-(176 mod s0) + 176)>

func.func @dynamic_fill_with_scalable_tiling_infer_vector_size(%arg0: tensor<1x67x120x176xf32>) -> tensor<1x67x120x176xf32>
attributes {hal.executable.target = #aarch64_sve}
{
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c67 = arith.constant 67 : index
%c120 = arith.constant 120 : index
%cst = arith.constant 0.000000e+00 : f32
%vscale = vector.vscale
%c4_vscale = arith.muli %vscale, %c4 : index
%0 = scf.for %arg1 = %c0 to %c67 step %c1 iter_args(%arg2 = %arg0) -> (tensor<1x67x120x176xf32>) {
%1 = scf.for %arg3 = %c0 to %c120 step %c4 iter_args(%arg4 = %arg2) -> (tensor<1x67x120x176xf32>) {
%2 = affine.apply #map()[%c4_vscale]
%3 = scf.for %arg5 = %c0 to %2 step %c4_vscale iter_args(%arg6 = %arg4) -> (tensor<1x67x120x176xf32>) {
%extracted_slice = tensor.extract_slice %arg6[0, %arg1, %arg3, %arg5] [1, 1, 4, %c4_vscale] [1, 1, 1, 1] : tensor<1x67x120x176xf32> to tensor<1x1x4x?xf32>
%4 = linalg.fill ins(%cst : f32) outs(%extracted_slice : tensor<1x1x4x?xf32>) -> tensor<1x1x4x?xf32>
%inserted_slice = tensor.insert_slice %4 into %arg6[0, %arg1, %arg3, %arg5] [1, 1, 4, %c4_vscale] [1, 1, 1, 1] : tensor<1x1x4x?xf32> into tensor<1x67x120x176xf32>
scf.yield %inserted_slice : tensor<1x67x120x176xf32>
}
scf.yield %3 : tensor<1x67x120x176xf32>
}
scf.yield %1 : tensor<1x67x120x176xf32>
}
return %0 : tensor<1x67x120x176xf32>
}

// CHECK-MASK-LABEL: func.func @dynamic_fill_with_scalable_tiling_infer_vector_size
// CHECK-MASK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x4x[4]xf32>
// CHECK-MASK: scf.for
// CHECK-MASK: scf.for
// CHECK-MASK: scf.for
// CHECK-MASK: vector.transfer_write %[[CST]], {{.*}} {in_bounds = [true, true, true, true]} : vector<1x1x4x[4]xf32>, tensor<1x1x4x?xf32>
22 changes: 22 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,4 +1157,26 @@ getDefaultVscaleRange(IREE::HAL::ExecutableTargetAttr targetAttr) {
return std::nullopt;
}

FailureOr<DimBoundSize>
computeDimUpperBound(Value shapedValue, unsigned dimNum,
std::optional<VscaleRange> vscaleRange) {
if (!vscaleRange.has_value()) {
FailureOr<int64_t> maybeDimBoundSize =
ValueBoundsConstraintSet::computeConstantBound(
presburger::BoundType::UB, {shapedValue, dimNum},
/*stopCondition=*/nullptr, /*closedUB=*/true);
if (succeeded(maybeDimBoundSize))
return DimBoundSize{.baseSize = *maybeDimBoundSize, .scalable = false};
return failure();
}
FailureOr<DimBound> maybeDimBound =
vector::ScalableValueBoundsConstraintSet::computeScalableBound(
shapedValue, dimNum,
/*vscaleMin=*/vscaleRange->min,
/*vscaleMax=*/vscaleRange->max, presburger::BoundType::UB);
if (succeeded(maybeDimBound))
return maybeDimBound->getSize();
return failure();
}

} // namespace mlir::iree_compiler
11 changes: 11 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
#include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
Expand Down Expand Up @@ -233,6 +234,16 @@ struct VscaleRange {
std::optional<VscaleRange>
getDefaultVscaleRange(IREE::HAL::ExecutableTargetAttr targetAttr);

using DimBound = vector::ConstantOrScalableBound;
using DimBoundSize = DimBound::BoundSize;

/// Computes the upper bound of `dimNum` dim of the ShapedType value
/// `shapedValue`. If the optional `vscaleRange` is provided then the computed
/// bound can be a scalable quantity.
FailureOr<DimBoundSize>
computeDimUpperBound(Value shapedValue, unsigned dimNum,
std::optional<VscaleRange> vscaleRange);

} // namespace mlir::iree_compiler

#endif // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_

0 comments on commit cfc79ea

Please sign in to comment.