From cfc79eaddf22ac9a69b1557722f905928972fca5 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 19 Jul 2024 14:03:11 +0100 Subject: [PATCH] [Codegen] Support inferring scalable vector sizes (#17891) This patch extends generic vectorization to support inferring scalable vector sizes for linalg ops (using the ScalableValueBoundsConstraintSet). Note: Inferring scalable sizes for tensor.pack/unpack is not supported. --------- Signed-off-by: Benjamin Maxwell --- .../Codegen/Common/GenericVectorization.cpp | 45 +++++++++++++------ .../Common/test/generic_vectorization.mlir | 39 ++++++++++++++++ .../src/iree/compiler/Codegen/Utils/Utils.cpp | 22 +++++++++ .../src/iree/compiler/Codegen/Utils/Utils.h | 11 +++++ 4 files changed, 104 insertions(+), 13 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp index 16a04bb308a2..e36a9c789092 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp @@ -29,6 +29,7 @@ namespace { struct VectorizationTileSizes { SmallVector destShape; SmallVector vectorSizes; + SmallVector vectorScalableFlags; }; /// Returns a VectorizationTileSizes which contains the inferred bounded result @@ -41,13 +42,25 @@ static std::optional inferSizesFromIR(Value val); /// Returns std::nullopt if vector sizes can't be inferred. static std::optional inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { - LLVM_DEBUG(VEC_DBGS() << "Inferring sizes for:\n" - << linalgOp << " with OpResult.resultNumber=" - << opResult->getResultNumber() << "\n"); + LLVM_DEBUG({ + VEC_DBGS() << "Inferring sizes for:\n" << linalgOp; + if (opResult) { + VEC_DBGS() << " with OpResult.resultNumber=" + << opResult->getResultNumber(); + } + VEC_DBGS() << '\n'; + }); + + std::optional vscaleRange; + if (!opResult) { + // Note: Inferring scalable sizes is not supported is `opResult` is set + // (which is used to compute sizes for tensor.pack/unpack). + auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp); + vscaleRange = getDefaultVscaleRange(targetAttr); + } VectorizationTileSizes result; unsigned numDims = linalgOp.getNumLoops(); - for (int dim = 0; dim < numDims; ++dim) { // Map dimension `dim` to an operand dimension that we will use to // traverse the U-D chain to get `dim` vector size information. @@ -63,22 +76,21 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { // Trivial case: `dim` size is available in the operand type. int64_t dimSize = llvm::cast(firstOperand.getType()) .getShape()[firstOperandDim]; + bool dimScalable = false; if (!ShapedType::isDynamic(dimSize)) { result.vectorSizes.push_back(dimSize); + result.vectorScalableFlags.push_back(dimScalable); LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize << "' for dimension '" << dim << "'\n"); continue; } // Use ValueBounds analysis to infer `dim` size upper bound. - FailureOr maybeDimBound; + FailureOr maybeDimBound; for (auto operandDimPair : operandDimPairs) { Value operand = operandDimPair.first; unsigned operandDim = operandDimPair.second; - maybeDimBound = ValueBoundsConstraintSet::computeConstantBound( - presburger::BoundType::UB, {operand, operandDim}, - /*stopCondition=*/nullptr, /*closedUB=*/true); - + maybeDimBound = computeDimUpperBound(operand, operandDim, vscaleRange); if (succeeded(maybeDimBound)) { break; } @@ -88,13 +100,19 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { return std::nullopt; } - dimSize = maybeDimBound.value(); + dimSize = maybeDimBound->baseSize; + dimScalable = maybeDimBound->scalable; result.vectorSizes.push_back(dimSize); + result.vectorScalableFlags.push_back(dimScalable); + LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize + << (dimScalable ? " x vscale" : "") << "' for dimension '" << dim << "'\n"); } if (opResult) { + assert(!llvm::is_contained(result.vectorScalableFlags, true) && + "inferring scalable bounds with `opResult` not supported!"); result.destShape = linalgOp.getIndexingMapMatchingResult(opResult.value()) .compose(result.vectorSizes); } @@ -244,12 +262,14 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) { // Try to infer the vector sizes from the IR. std::optional> vectorSizes; + SmallVector scalableFlags; TypeSwitch(op) .Case([&](linalg::LinalgOp linalgOp) { std::optional result = inferSizesFromIR(linalgOp, /*opResult=*/std::nullopt); if (result) { vectorSizes = result->vectorSizes; + scalableFlags = result->vectorScalableFlags; } }) .Case([&](auto op) { @@ -269,9 +289,8 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) { .Default([&](Operation *) {}); if (vectorSizes) { - // This can't identify scalable flags, so pad them with `false`. - return std::make_pair(vectorSizes.value(), - SmallVector(vectorSizes->size(), false)); + scalableFlags.resize(vectorSizes->size(), false); + return std::make_pair(vectorSizes.value(), scalableFlags); } return std::nullopt; } diff --git a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir index 6943348fd651..3f0947d43f91 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir @@ -366,3 +366,42 @@ func.func @generic_unpack_infer_vector_size(%arg0: tensor, %arg1: // CHECK-MASK: %[[GENERIC_SRC:.+]] = vector.transfer_read %[[UNPACK_WRITE]]{{.+}}, %[[GENERIC_MASK]] // CHECK-MASK: %[[EXP:.+]] = math.exp %[[GENERIC_SRC]] // CHECK-MASK: vector.transfer_write %[[EXP]]{{.+}}, %[[GENERIC_MASK]] + +// ----- + +#aarch64_sve = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", target_triple = "aarch64-none-elf"}> +#map = affine_map<()[s0] -> (-(176 mod s0) + 176)> + +func.func @dynamic_fill_with_scalable_tiling_infer_vector_size(%arg0: tensor<1x67x120x176xf32>) -> tensor<1x67x120x176xf32> + attributes {hal.executable.target = #aarch64_sve} +{ + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c67 = arith.constant 67 : index + %c120 = arith.constant 120 : index + %cst = arith.constant 0.000000e+00 : f32 + %vscale = vector.vscale + %c4_vscale = arith.muli %vscale, %c4 : index + %0 = scf.for %arg1 = %c0 to %c67 step %c1 iter_args(%arg2 = %arg0) -> (tensor<1x67x120x176xf32>) { + %1 = scf.for %arg3 = %c0 to %c120 step %c4 iter_args(%arg4 = %arg2) -> (tensor<1x67x120x176xf32>) { + %2 = affine.apply #map()[%c4_vscale] + %3 = scf.for %arg5 = %c0 to %2 step %c4_vscale iter_args(%arg6 = %arg4) -> (tensor<1x67x120x176xf32>) { + %extracted_slice = tensor.extract_slice %arg6[0, %arg1, %arg3, %arg5] [1, 1, 4, %c4_vscale] [1, 1, 1, 1] : tensor<1x67x120x176xf32> to tensor<1x1x4x?xf32> + %4 = linalg.fill ins(%cst : f32) outs(%extracted_slice : tensor<1x1x4x?xf32>) -> tensor<1x1x4x?xf32> + %inserted_slice = tensor.insert_slice %4 into %arg6[0, %arg1, %arg3, %arg5] [1, 1, 4, %c4_vscale] [1, 1, 1, 1] : tensor<1x1x4x?xf32> into tensor<1x67x120x176xf32> + scf.yield %inserted_slice : tensor<1x67x120x176xf32> + } + scf.yield %3 : tensor<1x67x120x176xf32> + } + scf.yield %1 : tensor<1x67x120x176xf32> + } + return %0 : tensor<1x67x120x176xf32> +} + +// CHECK-MASK-LABEL: func.func @dynamic_fill_with_scalable_tiling_infer_vector_size +// CHECK-MASK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x4x[4]xf32> +// CHECK-MASK: scf.for +// CHECK-MASK: scf.for +// CHECK-MASK: scf.for +// CHECK-MASK: vector.transfer_write %[[CST]], {{.*}} {in_bounds = [true, true, true, true]} : vector<1x1x4x[4]xf32>, tensor<1x1x4x?xf32> diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp index abeb35d83553..5db070b79c08 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp @@ -1157,4 +1157,26 @@ getDefaultVscaleRange(IREE::HAL::ExecutableTargetAttr targetAttr) { return std::nullopt; } +FailureOr +computeDimUpperBound(Value shapedValue, unsigned dimNum, + std::optional vscaleRange) { + if (!vscaleRange.has_value()) { + FailureOr maybeDimBoundSize = + ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::UB, {shapedValue, dimNum}, + /*stopCondition=*/nullptr, /*closedUB=*/true); + if (succeeded(maybeDimBoundSize)) + return DimBoundSize{.baseSize = *maybeDimBoundSize, .scalable = false}; + return failure(); + } + FailureOr maybeDimBound = + vector::ScalableValueBoundsConstraintSet::computeScalableBound( + shapedValue, dimNum, + /*vscaleMin=*/vscaleRange->min, + /*vscaleMax=*/vscaleRange->max, presburger::BoundType::UB); + if (succeeded(maybeDimBound)) + return maybeDimBound->getSize(); + return failure(); +} + } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h index 6aa01a3d561c..ae53df006a81 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h @@ -13,6 +13,7 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h" +#include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" @@ -233,6 +234,16 @@ struct VscaleRange { std::optional getDefaultVscaleRange(IREE::HAL::ExecutableTargetAttr targetAttr); +using DimBound = vector::ConstantOrScalableBound; +using DimBoundSize = DimBound::BoundSize; + +/// Computes the upper bound of `dimNum` dim of the ShapedType value +/// `shapedValue`. If the optional `vscaleRange` is provided then the computed +/// bound can be a scalable quantity. +FailureOr +computeDimUpperBound(Value shapedValue, unsigned dimNum, + std::optional vscaleRange); + } // namespace mlir::iree_compiler #endif // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_