diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp index 02939d8eaf2c..d60e6b19c447 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp @@ -782,17 +782,11 @@ struct ConvertDispatchOp IREE::Flow::DispatchOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { - // Zero is going to be used for each operand to start. - auto zeroOffset = rewriter.create(op.getLoc(), 0); - // Query and resolve all operands and their sizes. - SmallVector dispatchOperands; - SmallVector dispatchOperandSizes; - SmallVector dispatchOperandOffsets; - SmallVector dispatchOperandEnds; - SmallVector dispatchOperandLengths; + SmallVector operands; SmallVector operandSizes; - + SmallVector allOperandSizes; + SmallVector operandEncodings; for (auto [oldOperand, convertedOperands] : llvm::zip_equal(op.getArguments(), adaptor.getArguments())) { Value newOperand; @@ -801,34 +795,36 @@ struct ConvertDispatchOp transferTensorOperands(op.getLoc(), oldOperand, convertedOperands, executionAffinityAttr, rewriter); newOperand = newOperandCast.resource; - dispatchOperandSizes.push_back(newOperandCast.resourceSize); operandSizes.push_back(newOperandCast.resourceSize); - dispatchOperandOffsets.push_back(zeroOffset); - dispatchOperandEnds.push_back(newOperandCast.resourceSize); - dispatchOperandLengths.push_back(newOperandCast.resourceSize); + allOperandSizes.push_back(newOperandCast.resourceSize); + operandEncodings.push_back(oldOperand.getType()); } else { - operandSizes.push_back({}); + allOperandSizes.push_back({}); + operandEncodings.push_back(rewriter.getType()); newOperand = convertedOperands.front(); } - dispatchOperands.push_back(newOperand); + operands.push_back(newOperand); } // Construct result sizes or reuse tied operand sizes from above. SmallVector resultSizes; SmallVector resultTypes; + SmallVector resultEncodings; auto unknownType = rewriter.getType(); auto tiedOperandBase = op.getTiedOperandsIndexAndLength().first; for (auto result : llvm::enumerate(op.getResults())) { auto oldResultType = result.value().getType(); if (!llvm::isa(oldResultType)) { resultTypes.push_back(getTypeConverter()->convertType(oldResultType)); + resultEncodings.push_back(rewriter.getType()); continue; } auto tiedOperand = op.getTiedResultOperandIndex(result.index()); if (tiedOperand.has_value()) { auto operandIndex = tiedOperand.value() - tiedOperandBase; - resultSizes.push_back(operandSizes[operandIndex]); - resultTypes.push_back(dispatchOperands[operandIndex].getType()); + resultSizes.push_back(allOperandSizes[operandIndex]); + resultTypes.push_back(operands[operandIndex].getType()); + resultEncodings.push_back(operandEncodings[operandIndex]); } else { auto resultDynamicDims = IREE::Util::buildDynamicDimsForValue( op.getLoc(), result.value(), rewriter); @@ -836,15 +832,21 @@ struct ConvertDispatchOp buildResultSizeOf(op.getLoc(), result.value(), resultDynamicDims, executionAffinityAttr, rewriter)); resultTypes.push_back(unknownType); + resultEncodings.push_back(oldResultType); } } - auto newOp = rewriter.create( + auto newOp = rewriter.create( op.getLoc(), resultTypes, flattenValues(adaptor.getWorkload()), - adaptor.getEntryPointsAttr(), dispatchOperands, dispatchOperandSizes, - dispatchOperandOffsets, dispatchOperandEnds, dispatchOperandLengths, - resultSizes, adaptor.getTiedOperandsAttr(), executionAffinityAttr); - newOp->setDialectAttrs(op->getDialectAttrs()); + adaptor.getEntryPointsAttr(), operands, operandSizes, + rewriter.getTypeArrayAttr(operandEncodings), op.getArgumentDims(), + resultSizes, rewriter.getTypeArrayAttr(resultEncodings), + op.getResultDims(), adaptor.getTiedOperandsAttr(), + executionAffinityAttr); + newOp->setDialectAttrs( + llvm::make_filter_range(op->getDialectAttrs(), [](NamedAttribute attr) { + return attr.getName() != "stream.affinity"; + })); SmallVector> replacementsVec = llvm::map_to_vector( llvm::zip_equal(newOp->getResults(), resultSizes), [](auto it) { return SmallVector{std::get<0>(it), std::get<1>(it)}; diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir index 063389fb4dfa..bd9bbc848db1 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir @@ -3,11 +3,11 @@ // CHECK-LABEL: @dispatchNoWorkload // CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index) util.func public @dispatchNoWorkload(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor { - // CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor{%[[DIM1]], %[[DIM3]]} - // CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@entry(%[[INPUT]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]]) : - // CHECK-SAME: (!stream.resource<*>{%[[INPUT_SIZE]]}) -> !stream.resource<*>{%[[RESULT_SIZE]]} + // CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor{%[[DIM1]], %[[DIM3]]} + // CHECK: %[[RESULT:.+]] = stream.tensor.dispatch @ex::@entry(%[[INPUT]]) : + // CHECK-SAME: (tensor<7x?x24x?xf32>{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[INPUT_SIZE]]}) -> tensor{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[RESULT_SIZE]]} %0 = flow.dispatch @ex::@entry(%input) : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor{%dim1, %dim3} - // return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index + // CHECK: util.return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index util.return %0 : tensor } @@ -15,16 +15,17 @@ util.func public @dispatchNoWorkload(%input: tensor<7x?x24x?xf32>, %dim1: index, // CHECK-LABEL: @dispatch // CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index) -util.func public @dispatch(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor { +util.func public @dispatch(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> (tensor, tensor<1024x?x?xf32>) { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index - // CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor{%[[DIM1]], %[[DIM3]]} - // CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@entry[%c1, %c2, %c3](%[[INPUT]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]]) : - // CHECK-SAME: (!stream.resource<*>{%[[INPUT_SIZE]]}) -> !stream.resource<*>{%[[RESULT_SIZE]]} - %0 = flow.dispatch @ex::@entry[%c1, %c2, %c3](%input) : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor{%dim1, %dim3} - // return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index - util.return %0 : tensor + // CHECK: %[[RESULT0_SIZE:.+]] = stream.tensor.sizeof tensor{%[[DIM1]], %[[DIM3]]} + // CHECK: %[[RESULT1_SIZE:.+]] = stream.tensor.sizeof tensor<1024x?x?xf32>{%[[DIM3]], %[[DIM1]]} + // CHECK: %[[RESULTS:.+]]:2 = stream.tensor.dispatch @ex::@entry[%c1, %c2, %c3](%[[INPUT]]) : + // CHECK-SAME: (tensor<7x?x24x?xf32>{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[INPUT_SIZE]]}) -> (tensor{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[RESULT0_SIZE]]}, tensor<1024x?x?xf32>{%[[DIM3]], %[[DIM1]]} in !stream.resource<*>{%[[RESULT1_SIZE]]}) + %results:2 = flow.dispatch @ex::@entry[%c1, %c2, %c3](%input) : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> (tensor{%dim1, %dim3}, tensor<1024x?x?xf32>{%dim3, %dim1}) + // CHECK: util.return %[[RESULTS]]#0, %[[RESULT0_SIZE]], %[[RESULTS]]#1, %[[RESULT1_SIZE]] : !stream.resource<*>, index, !stream.resource<*>, index + util.return %results#0, %results#1 : tensor, tensor<1024x?x?xf32> } // ----- @@ -36,9 +37,11 @@ util.func public @tiedDispatch(%input0: tensor, %input1: tensor<2x3xi32>) - %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index // CHECK: %[[T_SIZE:.+]] = stream.tensor.sizeof tensor<3x9xi32> : index - // CHECK: %[[T:.+]] = stream.async.dispatch @ex::@entry0[%c1, %c2, %c3](%[[INPUT0]][%c0 to %[[INPUT0_SIZE]] for %[[INPUT0_SIZE]]]) : (!stream.resource<*>{%[[INPUT0_SIZE]]}) -> !stream.resource<*>{%[[T_SIZE]]} + // CHECK: %[[T:.+]] = stream.tensor.dispatch @ex::@entry0[%c1, %c2, %c3](%[[INPUT0]]) : + // CHECK-SAME: (tensor in !stream.resource<*>{%[[INPUT0_SIZE]]}) -> tensor<3x9xi32> in !stream.resource<*>{%[[T_SIZE]]} %0 = flow.dispatch @ex::@entry0[%c1, %c2, %c3](%input0) : (tensor) -> tensor<3x9xi32> - // CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@entry1[%c1, %c2, %c3](%[[INPUT1]][%c0 to %[[INPUT1_SIZE]] for %[[INPUT1_SIZE]]], %[[T]][%c0 to %[[T_SIZE]] for %[[T_SIZE]]]) : (!stream.resource<*>{%[[INPUT1_SIZE]]}, !stream.resource<*>{%[[T_SIZE]]}) -> %[[T]]{%[[T_SIZE]]} + // CHECK: %[[RESULT:.+]] = stream.tensor.dispatch @ex::@entry1[%c1, %c2, %c3](%[[INPUT1]], %[[T]]) : + // CHECK-SAME: (tensor<2x3xi32> in !stream.resource<*>{%[[INPUT1_SIZE]]}, tensor<3x9xi32> in !stream.resource<*>{%[[T_SIZE]]}) -> tensor<3x9xi32> in %[[T]]{%[[T_SIZE]]} %1 = flow.dispatch @ex::@entry1[%c1, %c2, %c3](%input1, %0) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %0 // CHECK: util.return %[[RESULT]], %[[T_SIZE]] : !stream.resource<*>, index util.return %1 : tensor<3x9xi32> @@ -52,18 +55,20 @@ util.global private @device_b : !hal.device // CHECK-LABEL: @dispatchAffinity // CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index) util.func public @dispatchAffinity(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> (tensor, tensor) { - // CHECK: %[[INPUT_A:.+]] = stream.async.transfer %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]} -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%[[INPUT_SIZE]]} - // CHECK: %[[RESULT0_SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device_a>) tensor{%[[DIM1]], %[[DIM3]]} - // CHECK: %[[RESULT0:.+]] = stream.async.dispatch on(#hal.device.affinity<@device_a>) @ex::@entry0(%[[INPUT_A]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]]) + // CHECK: %[[INPUT_A:.+]] = stream.async.transfer %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]} -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%[[INPUT_SIZE]]} + // CHECK: %[[RESULT0_SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device_a>) tensor{%[[DIM1]], %[[DIM3]]} + // CHECK: %[[RESULT0:.+]] = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @ex::@entry0(%[[INPUT_A]]) + // CHECK-SAME: (tensor<7x?x24x?xf32>{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[INPUT_SIZE]]}) -> tensor{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[RESULT0_SIZE]]} %0 = flow.dispatch @ex::@entry0(%input) { stream.affinity = #hal.device.affinity<@device_a> } : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor{%dim1, %dim3} - // CHECK: %[[INPUT_B:.+]] = stream.async.transfer %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]} -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%[[INPUT_SIZE]]} - // CHECK: %[[RESULT1_SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device_b>) tensor{%[[DIM3]], %[[DIM1]]} - // CHECK: %[[RESULT1:.+]] = stream.async.dispatch on(#hal.device.affinity<@device_b>) @ex::@entry1(%[[INPUT_B]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]]) + // CHECK: %[[INPUT_B:.+]] = stream.async.transfer %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]} -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%[[INPUT_SIZE]]} + // CHECK: %[[RESULT1_SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device_b>) tensor{%[[DIM3]], %[[DIM1]]} + // CHECK: %[[RESULT1:.+]] = stream.tensor.dispatch on(#hal.device.affinity<@device_b>) @ex::@entry1(%[[INPUT_B]]) + // CHECK-SAME: (tensor<7x?x24x?xf32>{%[[DIM1]], %[[DIM3]]} in !stream.resource<*>{%[[INPUT_SIZE]]}) -> tensor{%[[DIM3]], %[[DIM1]]} in !stream.resource<*>{%[[RESULT1_SIZE]]} %1 = flow.dispatch @ex::@entry1(%input) { stream.affinity = #hal.device.affinity<@device_b> } : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor{%dim3, %dim1} - // return %[[RESULT0]], %[[RESULT0_SIZE]], %[[RESULT1]], %[[RESULT1_SIZE]] + // CHECK: return %[[RESULT0]], %[[RESULT0_SIZE]], %[[RESULT1]], %[[RESULT1_SIZE]] util.return %0, %1 : tensor, tensor } diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir index df9e5480ef90..4f61917ed439 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir @@ -141,16 +141,12 @@ util.global private @device : !hal.device // CHECK-LABEL: @tensorBarrierDispatch // CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index) util.func public @tensorBarrierDispatch(%input: tensor, %dim0: index) -> tensor { - %c0 = arith.constant 0 : index - %barrier = flow.tensor.barrier %input : tensor{%dim0} on #hal.device.affinity<@device> - %0 = flow.dispatch @ex::@entry[%c0](%barrier) : (tensor{%dim0}) -> tensor{%dim0} - - // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[BARRIER:.+]] = stream.async.barrier %[[INPUT]] : !stream.resource<*>{%[[DIM0]]} -> !stream.resource<*> - // CHECK: %[[C0_2:.+]] = arith.constant 0 : index + %barrier = flow.tensor.barrier %input : tensor{%dim0} on #hal.device.affinity<@device> // CHECK: %[[SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device>) tensor{%arg2} : index - // CHECK: %[[DISP:.+]] = stream.async.dispatch on(#hal.device.affinity<@device>) @ex::@entry[%[[C0]]](%[[BARRIER]][%[[C0_2]] to %[[DIM0]] for %[[DIM0]]]) - // CHECK: util.return %[[DISP]], %[[SIZE]] + // CHECK: %[[RESULT:.+]] = stream.tensor.dispatch on(#hal.device.affinity<@device>) @ex::@entry(%[[BARRIER]]) + %0 = flow.dispatch @ex::@entry(%barrier) : (tensor{%dim0}) -> tensor{%dim0} + // CHECK: util.return %[[RESULT]], %[[SIZE]] util.return %0 : tensor } diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp index d973db0c34ca..619df9a79ece 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp @@ -1363,6 +1363,36 @@ void TensorStoreOp::getCanonicalizationPatterns(RewritePatternSet &results, // TODO(benvanik): combine multiple stores to the same target if contiguous. } +//===----------------------------------------------------------------------===// +// stream.tensor.dispatch +//===----------------------------------------------------------------------===// + +namespace { + +struct DeduplicateTensorDispatchEntryRefs final + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(TensorDispatchOp dispatchOp, + PatternRewriter &rewriter) const override { + auto originalAttr = dispatchOp.getEntryPointsAttr(); + auto newAttr = deduplicateArrayElements(originalAttr); + if (newAttr == originalAttr) + return failure(); + rewriter.modifyOpInPlace(dispatchOp, + [&]() { dispatchOp.setEntryPointsAttr(newAttr); }); + return success(); + } +}; + +} // namespace + +void TensorDispatchOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + // TODO(benvanik): maybe tied type/lifetime updates? + results.insert>(context); + results.insert(context); +} + //===----------------------------------------------------------------------===// // stream.async.alloca //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp index c3bf0cf3546c..4623a7bd6c64 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp @@ -82,7 +82,7 @@ static LogicalResult verifyOpDynamicDims(Operation *op, TypeRange types, ValueRange dynamicDims) { unsigned requiredCount = 0; for (auto type : types) { - if (auto shapedType = llvm::dyn_cast(type)) { + if (auto shapedType = llvm::dyn_cast_if_present(type)) { requiredCount += shapedType.getNumDynamicDims(); } } @@ -95,6 +95,28 @@ static LogicalResult verifyOpDynamicDims(Operation *op, TypeRange types, return success(); } +// Verifies that |dynamicDims| contains the appropriate number of dims for all +// the dynamic dimensions in |type|. +static LogicalResult verifyOpDynamicDimsRange(Operation *op, + ArrayAttr typesAttr, + ValueRange dynamicDims) { + unsigned requiredCount = 0; + for (auto attr : typesAttr) { + if (auto typeAttr = dyn_cast_if_present(attr)) { + if (auto shapedType = llvm::dyn_cast(typeAttr.getValue())) { + requiredCount += shapedType.getNumDynamicDims(); + } + } + } + if (dynamicDims.size() != requiredCount) { + return op->emitOpError() + << "type set has " << requiredCount + << " dynamic dimensions but only " << dynamicDims.size() + << " dimension values are attached"; + } + return success(); +} + // Verifies that |sizes| contains the appropriate number of sizes for all of the // sized types in |values|. static LogicalResult verifyOpValueSizes(Operation *op, ValueRange values, @@ -367,6 +389,375 @@ static void printEncodedResourceOperands(OpAsmPrinter &p, Operation *op, p.printNewline(); } +//===----------------------------------------------------------------------===// +// custom +//===----------------------------------------------------------------------===// +// encoding{%dim0, %dim1} in type{%size0}, type, type{%size1} + +static ParseResult +parseShapedType(OpAsmParser &parser, Type &type, + SmallVectorImpl &dims) { + if (failed(parser.parseType(type))) { + return failure(); + } + if (auto shapedType = dyn_cast(type)) { + if (!shapedType.hasStaticShape()) { + SmallVector dynamicDims; + if (failed(parser.parseLBrace()) || + failed(parser.parseOperandList(dynamicDims, + shapedType.getNumDynamicDims(), + OpAsmParser::Delimiter::None)) || + failed(parser.parseRBrace())) { + return failure(); + } + dims.append(dynamicDims); + } + } else if (isa(type)) { + OpAsmParser::UnresolvedOperand size; + if (failed(parser.parseLBrace()) || failed(parser.parseOperand(size)) || + failed(parser.parseRBrace())) { + return failure(); + } + dims.push_back(size); + } + return success(); +} + +static void printSizedType(OpAsmPrinter &p, Operation *op, Type type, + Value size) { + p.printType(type); + p << "{"; + p.printOperand(size); + p << "}"; +} + +static OperandRange printShapedType(OpAsmPrinter &p, Operation *op, Type type, + OperandRange dims) { + p.printType(type); + if (auto shapedType = dyn_cast(type)) { + if (!shapedType.hasStaticShape()) { + if (dims.empty()) { + p << "{<>}"; + return dims; + } + p << "{"; + llvm::interleaveComma(dims.take_front(shapedType.getNumDynamicDims()), p, + [&](Value value) { p.printOperand(value); }); + p << "}"; + dims = dims.drop_front(shapedType.getNumDynamicDims()); + } + } else if (isa(type)) { + p << "{"; + p.printOperand(dims.front()); + p << "}"; + dims = dims.drop_front(1); + } + return dims; +} + +static ParseResult parseEncodedShapedTypeList( + OpAsmParser &parser, SmallVectorImpl &types, + SmallVectorImpl &sizes, + SmallVectorImpl &encodings, + SmallVectorImpl &encodingDims) { + do { + Type type0; + SmallVector dims0; + if (failed(parseShapedType(parser, type0, dims0))) { + return failure(); + } + if (succeeded(parser.parseOptionalKeyword("in"))) { + Type type1; + SmallVector dims1; + if (failed(parseShapedType(parser, type1, dims1))) { + return failure(); + } + types.push_back(type1); + sizes.append(dims1); + encodings.push_back(type0); + encodingDims.append(dims0); + } else { + types.push_back(type0); + sizes.append(dims0); + encodings.push_back(IREE::Util::UnusedType::get(parser.getContext())); + } + } while (succeeded(parser.parseOptionalComma())); + return success(); +} + +static void printEncodedShapedTypeList(OpAsmPrinter &p, Operation *op, + TypeRange types, OperandRange sizes, + ArrayAttr encodings, + OperandRange encodingDims) { + llvm::interleaveComma( + llvm::zip_equal(types, encodings.getAsValueRange()), p, + [&](std::tuple it) { + auto [type, encoding] = it; + if (!isa(encoding)) { + encodingDims = printShapedType(p, op, encoding, encodingDims); + p << " in "; + } + sizes = printShapedType(p, op, type, sizes); + }); +} + +//===----------------------------------------------------------------------===// +// custom +//===----------------------------------------------------------------------===// +// encoding{%dim0, %dim1} in type{%dim2}, type{%size}, %operand4 +// +// Supported result formats: +// type{%size} +// %operand as type{%size} +// encoding{%dim0, %dim1} in %operand4 +// encoding{%dim0, %dim1} in %operand4 as type{%size} + +static ParseResult parseEncodedShapedResultList( + OpAsmParser &parser, ArrayRef operands, + TypeRange operandTypes, + ArrayRef operandSizes, + SmallVectorImpl &resultTypes, + SmallVectorImpl &resultSizes, + SmallVectorImpl &resultEncodingTypes, + SmallVectorImpl &resultEncodingDims, + ArrayAttr &tiedOperands) { + SmallVector tiedOperandIndices; + do { + Type type0; + SmallVector dims0; + auto typeResult = parser.parseOptionalType(type0); + if (typeResult.has_value() && succeeded(typeResult.value())) { + if (auto shapedType = dyn_cast(type0)) { + if (!shapedType.hasStaticShape()) { + if (failed(parser.parseLBrace()) || + failed(parser.parseOperandList(dims0, + shapedType.getNumDynamicDims(), + OpAsmParser::Delimiter::None)) || + failed(parser.parseRBrace())) { + return failure(); + } + } + } else if (auto sizedType = + dyn_cast(type0)) { + OpAsmParser::UnresolvedOperand size; + if (failed(parser.parseLBrace()) || failed(parser.parseOperand(size)) || + failed(parser.parseRBrace())) { + return failure(); + } + dims0.push_back(size); + } + } + + // Type only: + if (failed(parser.parseOptionalKeyword("in"))) { + resultTypes.push_back(type0); + resultSizes.append(dims0); + resultEncodingTypes.push_back( + IREE::Util::UnusedType::get(parser.getContext())); + tiedOperandIndices.push_back(IREE::Util::TiedOpInterface::kUntiedIndex); + continue; + } + + // Check for optional tied result reference. + OpAsmParser::UnresolvedOperand tiedResult; + auto res = parser.parseOptionalOperand(tiedResult); + Type resultType; + int64_t tiedOperandIndex = IREE::Util::TiedOpInterface::kUntiedIndex; + if (res.has_value() && succeeded(res.value())) { + tiedOperandIndex = findTiedOperand(tiedResult, operands); + if (tiedOperandIndex == IREE::Util::TiedOpInterface::kUntiedIndex) { + return parser.emitError(tiedResult.location, + "tied operand not found for result reference ") + << tiedResult.name; + } + if (succeeded(parser.parseOptionalKeyword("as"))) { + // Type _may_ differ from the operand. + if (failed(parser.parseType(resultType))) { + return failure(); + } + } else { + // Use the operands type. + resultType = operandTypes[tiedOperandIndex]; + } + } else if (failed(parser.parseType(resultType))) { + return failure(); + } + + // Parse optional type dimensions (usually resource size here). + if (auto sizedType = + dyn_cast(resultType)) { + OpAsmParser::UnresolvedOperand size; + if (failed(parser.parseLBrace()) || failed(parser.parseOperand(size)) || + failed(parser.parseRBrace())) { + return failure(); + } + resultSizes.push_back(size); + } + + resultTypes.push_back(resultType); + resultEncodingTypes.push_back(type0); + resultEncodingDims.append(dims0); + tiedOperandIndices.push_back(tiedOperandIndex); + } while (succeeded(parser.parseOptionalComma())); + if (!tiedOperandIndices.empty()) { + tiedOperands = parser.getBuilder().getIndexArrayAttr(tiedOperandIndices); + } + return success(); +} + +static void printEncodedShapedResultList( + OpAsmPrinter &p, Operation *op, ValueRange operands, TypeRange operandTypes, + OperandRange operandSizes, TypeRange resultTypes, OperandRange resultSizes, + ArrayAttr resultEncodings, OperandRange resultEncodingDims, + ArrayAttr tiedOperands) { + auto tiedOp = dyn_cast(op); + for (unsigned i = 0; i < resultTypes.size(); ++i) { + auto resultEncodingType = + cast(resultEncodings.getValue()[i]).getValue(); + if (!isa(resultEncodingType)) { + p.printType(resultEncodingType); + if (auto shapedType = dyn_cast(resultEncodingType)) { + if (!shapedType.hasStaticShape()) { + if (resultEncodingDims.empty()) { + p << "{<>}"; + return; + } + p << "{"; + llvm::interleaveComma( + resultEncodingDims.take_front(shapedType.getNumDynamicDims()), p, + [&](Value value) { p.printOperand(value); }); + p << "}"; + resultEncodingDims = + resultEncodingDims.drop_front(shapedType.getNumDynamicDims()); + } + } else if (auto sizedType = dyn_cast( + resultEncodingType)) { + p << "{"; + p.printOperand(resultEncodingDims.front()); + p << "}"; + resultEncodingDims = resultEncodingDims.drop_front(1); + } + p << " in "; + } + auto resultType = resultTypes[i]; + auto tiedOperandIndex = + tiedOp ? tiedOp.getTiedResultOperandIndex(i) : std::nullopt; + bool printType = true; + if (tiedOperandIndex.has_value()) { + auto tiedOperand = op->getOperand(tiedOperandIndex.value()); + p.printOperand(tiedOperand); + if (tiedOperand.getType() != resultType) { + p << " as "; + } else { + // Type elided as it matches the operand. + printType = false; + } + } + if (printType) { + p.printType(resultType); + } + if (auto sizedType = + dyn_cast(resultType)) { + p << "{"; + p.printOperand(resultSizes.front()); + p << "}"; + resultSizes = resultSizes.drop_front(1); + } + if (i < resultTypes.size() - 1) { + p << ", "; + } + } +} + +//===----------------------------------------------------------------------===// +// custom +//===----------------------------------------------------------------------===// +// (type, encoding{%dim0, %dim1} in type{%size}, type) -> +// (encoding{%dim} in type{%size}, %operand4) + +static ParseResult parseEncodedShapedFunctionType( + OpAsmParser &parser, ArrayRef operands, + SmallVectorImpl &operandTypes, + SmallVectorImpl &operandSizes, + ArrayAttr &operandEncodings, + SmallVectorImpl &operandEncodingDims, + SmallVectorImpl &resultTypes, + SmallVectorImpl &resultSizes, + ArrayAttr &resultEncodings, + SmallVectorImpl &resultEncodingDims, + ArrayAttr &tiedOperands) { + SmallVector operandEncodingTypes; + SmallVector resultEncodingTypes; + if (failed(parser.parseLParen())) { + return failure(); + } + if (failed(parser.parseOptionalRParen())) { + if (failed(parseEncodedShapedTypeList(parser, operandTypes, operandSizes, + operandEncodingTypes, + operandEncodingDims)) || + failed(parser.parseRParen())) { + return failure(); + } + } + if (failed(parser.parseArrow())) { + return failure(); + } + if (succeeded(parser.parseOptionalLParen())) { + if (succeeded(parser.parseOptionalRParen())) { + // Empty list/no results `()`. + } else { + // One or more result types. + if (failed(parseEncodedShapedResultList( + parser, operands, operandTypes, operandSizes, resultTypes, + resultSizes, resultEncodingTypes, resultEncodingDims, + tiedOperands)) || + failed(parser.parseRParen())) { + return failure(); + } + } + } else { + // Single result with omitted `()`. + if (failed(parseEncodedShapedResultList( + parser, operands, operandTypes, operandSizes, resultTypes, + resultSizes, resultEncodingTypes, resultEncodingDims, + tiedOperands))) { + return failure(); + } + } + operandEncodings = ArrayAttr::get( + parser.getContext(), + llvm::map_to_vector(operandEncodingTypes, [](Type type) -> Attribute { + return type ? TypeAttr::get(type) : Attribute{}; + })); + resultEncodings = ArrayAttr::get( + parser.getContext(), + llvm::map_to_vector(resultEncodingTypes, [](Type type) -> Attribute { + return TypeAttr::get(type); + })); + return success(); +} + +static void printEncodedShapedFunctionType( + OpAsmPrinter &p, Operation *op, ValueRange operands, TypeRange operandTypes, + OperandRange operandSizes, ArrayAttr operandEncodings, + OperandRange operandEncodingDims, TypeRange resultTypes, + OperandRange resultSizes, ArrayAttr resultEncodings, + OperandRange resultEncodingDims, ArrayAttr tiedOperands) { + p << "("; + printEncodedShapedTypeList(p, op, operandTypes, operandSizes, + operandEncodings, operandEncodingDims); + p << ") -> "; + if (resultTypes.size() != 1) { + p << "("; + } + printEncodedShapedResultList(p, op, operands, operandTypes, operandSizes, + resultTypes, resultSizes, resultEncodings, + resultEncodingDims, tiedOperands); + if (resultTypes.size() != 1) { + p << ")"; + } +} + //===----------------------------------------------------------------------===// // custom( // $source_scope, $source_keys, $source_offsets, @@ -1704,6 +2095,70 @@ ValueRange TensorTraceOp::getResultDynamicDims(unsigned idx) { return ValueRange{}; } +//===----------------------------------------------------------------------===// +// stream.tensor.dispatch +//===----------------------------------------------------------------------===// + +LogicalResult TensorDispatchOp::verify() { + TensorDispatchOp op = *this; + if (failed(verifyOpValueSizes(op, op.getMixedOperands(), + op.getOperandSizes())) || + failed(verifyOpValueSizes(op, op.getResults(), op.getResultSizes()))) { + return failure(); + } + if (failed(verifyOpDynamicDimsRange(op, op.getOperandEncodings(), + op.getOperandEncodingDims())) || + failed(verifyOpDynamicDimsRange(op, op.getResultEncodings(), + op.getResultEncodingDims()))) { + return failure(); + } + return success(); +} + +static LogicalResult +verifyDispatchSymbolUses(Operation *op, ArrayAttr entryPointsAttr, + ValueRange workload, + SymbolTableCollection &symbolTable) { + auto entryPointAttrs = entryPointsAttr.getAsRange(); + if (entryPointAttrs.empty()) { + return op->emitOpError() << "at least one entry point must be defined"; + } + for (auto entryPointAttr : entryPointAttrs) { + auto exportOp = + symbolTable.lookupNearestSymbolFrom( + op, entryPointAttr); + if (!exportOp) { + // TODO(benvanik): there are a lot of tests that are assuming this is not + // verified. We'll need to go add dummy executables for all of them. Today + // we just bail on the verifier if the symbol isn't found. + // + // Should be: + // return op->emitOpError() << "undefined entry point: " << + // entry_point(); + return success(); + } + + // Verify that the workload parameters captured match the target export. + if (failed(verifyDispatchWorkload(op, exportOp, workload))) { + return failure(); + } + + // TODO(benvanik): verify that the target function has matching operands. + } + return success(); +} + +LogicalResult +TensorDispatchOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + return verifyDispatchSymbolUses(getOperation(), getEntryPointsAttr(), + getWorkload(), symbolTable); +} + +std::pair +TensorDispatchOp::getTiedOperandsIndexAndLength() { + return getODSOperandIndexAndLength(1); // $operands +} + //===----------------------------------------------------------------------===// // stream.async.alloca //===----------------------------------------------------------------------===// @@ -2220,34 +2675,8 @@ LogicalResult AsyncDispatchOp::verify() { LogicalResult AsyncDispatchOp::verifySymbolUses(SymbolTableCollection &symbolTable) { - Operation *op = getOperation(); - auto entryPointRefs = getEntryPointRefs(); - if (entryPointRefs.empty()) { - return emitOpError() << "at least one entry point must be defined"; - } - for (auto entryPointAttr : entryPointRefs) { - auto exportOp = - symbolTable.lookupNearestSymbolFrom( - op, entryPointAttr); - if (!exportOp) { - // TODO(benvanik): there are a lot of tests that are assuming this is not - // verified. We'll need to go add dummy executables for all of them. Today - // we just bail on the verifier if the symbol isn't found. - // - // Should be: - // return op->emitOpError() << "undefined entry point: " << - // entry_point(); - return success(); - } - - // Verify that the workload parameters captured match the target export. - if (failed(verifyDispatchWorkload(op, exportOp, getWorkload()))) { - return failure(); - } - - // TODO(benvanik): verify that the target function has matching operands. - } - return success(); + return verifyDispatchSymbolUses(getOperation(), getEntryPointsAttr(), + getWorkload(), symbolTable); } std::pair AsyncDispatchOp::getTiedOperandsIndexAndLength() { diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td index bfd755a28279..8ed4bca948fa 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td +++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td @@ -1739,6 +1739,80 @@ def Stream_TensorTraceOp : Stream_Op<"tensor.trace", [ let hasVerifier = 1; } +def Stream_TensorDispatchOp : Stream_Op<"tensor.dispatch", [ + AttrSizedOperandSegments, + DeclareOpInterfaceMethods, + Stream_AffinityOp, + Stream_TensorPhaseOp, + Stream_StreamableOp, + Util_SizeAwareOp, + DeclareOpInterfaceMethods, +]> { + let summary = [{dispatches a parallelized grid of work}]; + let description = [{ + Calls the specified entry point function once for each element in the + specified workgroup count. Each workgroup has access to the same operands + and results and is able to load/store at will. + }]; + + let arguments = (ins + Variadic:$workload, + SymbolRefArrayAttr:$entry_points, + Variadic>:$mixed_operands, + Variadic:$operand_sizes, + TypeArrayAttr:$operand_encodings, + Stream_ShapeDynamicDims:$operand_encoding_dims, + Variadic:$result_sizes, + TypeArrayAttr:$result_encodings, + Stream_ShapeDynamicDims:$result_encoding_dims, + OptionalAttr:$tied_operands, + OptionalAttr:$affinity + ); + let results = (outs + Variadic:$results + ); + + let assemblyFormat = [{ + (`on` `(` $affinity^ `)`)? + custom($entry_points) + (`[` $workload^ `]`)? `` + `(` $mixed_operands `)` + attr-dict `:` + custom( + ref($mixed_operands), + type($mixed_operands), $operand_sizes, + $operand_encodings, $operand_encoding_dims, + type($results), $result_sizes, + $result_encodings, $result_encoding_dims, + $tied_operands) + }]; + + let extraClassDeclaration = [{ + auto getEntryPointRefs() { + return getEntryPoints().getAsRange(); + } + void forEachEntryPointAttr(std::function fn) { + for (auto entryPointAttr : getEntryPointRefs()) fn(entryPointAttr); + } + + Value getOperandSize(unsigned idx) { + return IREE::Util::findValueSizeInList(idx, getOperands(), getOperandSizes()); + } + Value getResultSize(unsigned idx) { + return IREE::Util::findValueSizeInList(idx, getResults(), getResultSizes()); + } + }]; + + let hasVerifier = 1; + + let hasCanonicalizer = 1; +} + } // OpGroupTensorOps //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir index a224e4ade0a3..c6d20e13acca 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir @@ -151,3 +151,18 @@ util.func private @tensorTrace(%tensor0: !stream.resource, %tensor0_siz ] util.return } + +// ----- + +// CHECK-LABEL: @tensorDispatch +util.func private @tensorDispatch(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> !stream.resource<*> { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + // CHECK: = stream.tensor.dispatch @executable::@dispatch[%c1, %c2, %c3](%arg0, %c4) : + // CHECK-SAME: (tensor<4x?xf32>{%arg2} in !stream.resource<*>{%arg1}, index) -> tensor{%arg2} in %arg0{%arg1} + %0 = stream.tensor.dispatch @executable::@dispatch[%c1, %c2, %c3](%arg0, %c4) : (tensor<4x?xf32>{%arg2} in !stream.resource<*>{%arg1}, index) -> tensor{%arg2} in %arg0{%arg1} + util.return %0 : !stream.resource<*> +} diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp index d831a44b459c..c3753aab0dfe 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp @@ -589,6 +589,41 @@ struct EncodeTensorStoreOp } }; +//===----------------------------------------------------------------------===// +// stream.tensor.dispatch +//===----------------------------------------------------------------------===// + +struct EncodeTensorDispatchOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(IREE::Stream::TensorDispatchOp op, + PatternRewriter &rewriter) const override { + // Strip off the tensor encoding information - it's not used at all here. If + // we changed the tensor dispatch op to accept indices and lengths for + // offsetting we would need to account for that here but today we require + // that to happen on slices/updates instead. + Value zeroOffset = rewriter.create(op.getLoc(), 0); + SmallVector operandOffsets; + SmallVector operandEnds; + SmallVector operandLengths; + auto operandSizes = op.getOperandSizes(); + for (auto operand : op.getMixedOperands()) { + if (isa(operand.getType())) { + operandOffsets.push_back(zeroOffset); + operandEnds.push_back(operandSizes.front()); + operandLengths.push_back(operandSizes.front()); + operandSizes = operandSizes.drop_front(1); + } + } + rewriter.replaceOpWithNewOp( + op, op.getResultTypes(), op.getWorkload(), op.getEntryPointsAttr(), + op.getMixedOperands(), op.getOperandSizes(), operandOffsets, + operandEnds, operandLengths, op.getResultSizes(), + op.getTiedOperandsAttr(), op.getAffinityAttr()); + return success(); + } +}; + //===----------------------------------------------------------------------===// // --iree-stream-encode-host-tensors //===----------------------------------------------------------------------===// @@ -602,8 +637,8 @@ struct EncodeHostTensorsPass EncodeTensorImportOp, EncodeTensorExportOp, EncodeTensorSizeOfOp, EncodeTensorEmptyOp, EncodeTensorConstantOp, EncodeTensorSplatOp, EncodeTensorCloneOp, EncodeTensorSliceOp, EncodeTensorFillOp, - EncodeTensorUpdateOp, EncodeTensorLoadOp, EncodeTensorStoreOp>( - &getContext()); + EncodeTensorUpdateOp, EncodeTensorLoadOp, EncodeTensorStoreOp, + EncodeTensorDispatchOp>(&getContext()); FrozenRewritePatternSet frozenPatterns(std::move(patterns)); if (failed(applyPatternsGreedily(getOperation(), frozenPatterns))) { return signalPassFailure(); diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/BUILD.bazel index c379ed2dac47..138ba0be6689 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/BUILD.bazel @@ -28,8 +28,8 @@ iree_lit_test_suite( "encode_device_tensors.mlir", "encode_device_tensors_packing.mlir", "encode_host_tensors.mlir", + "encode_host_tensors_encoding.mlir", "encode_host_tensors_packing.mlir", - "encode_host_tensors_packing_i1_attr.mlir", "encode_host_tensors_packing_i1_experimental_clopt.mlir", "fold_globals.mlir", "fold_uniform_operands.mlir", diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/CMakeLists.txt index 48e6ccf5b3f8..4c4cb93d80ef 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/CMakeLists.txt @@ -26,8 +26,8 @@ iree_lit_test_suite( "encode_device_tensors.mlir" "encode_device_tensors_packing.mlir" "encode_host_tensors.mlir" + "encode_host_tensors_encoding.mlir" "encode_host_tensors_packing.mlir" - "encode_host_tensors_packing_i1_attr.mlir" "encode_host_tensors_packing_i1_experimental_clopt.mlir" "fold_globals.mlir" "fold_uniform_operands.mlir" diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir index 8815f6103f78..9e6600d2dcf4 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir @@ -38,7 +38,7 @@ util.func public @simple_mul(%arg0: !hal.buffer_view) -> !hal.buffer_view attrib %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index // CHECK: %[[RET0_SIZE:.+]] = stream.tensor.sizeof tensor{%[[DIM0]]} : index - // CHECK: %[[RET0:.+]] = stream.async.dispatch @executable::@dispatch[%c2, %c1, %c1](%[[ARG0_T]][%c0 to %[[ARG0_SIZE]] for %[[ARG0_SIZE]]]) : (!stream.resource<*>{%[[ARG0_SIZE]]}) -> !stream.resource<*>{%[[RET0_SIZE]]} + // CHECK: %[[RET0:.+]] = stream.tensor.dispatch @executable::@dispatch[%c2, %c1, %c1](%[[ARG0_T]]) : (tensor{%[[DIM0]]} in !stream.resource<*>{%[[ARG0_SIZE]]}) -> tensor{%[[DIM0]]} in !stream.resource<*>{%[[RET0_SIZE]]} %1 = flow.dispatch @executable::@dispatch[%c2, %c1, %c1](%0) : (tensor{%dim0}) -> tensor{%dim0} // CHECK: %[[RET0_T:.+]] = stream.async.transfer %[[RET0]] : !stream.resource<*>{%[[RET0_SIZE]]} -> !stream.resource{%[[RET0_SIZE]]} @@ -136,7 +136,7 @@ util.func public @while_test() { // CHECK: ^bb1(%[[BB1_ARG:.+]]: !stream.resource<*>, %[[BB1_ARG_SIZE:.+]]: index): ^bb1(%1: tensor): // CHECK: %[[COND_SIZE:.+]] = stream.tensor.sizeof tensor : index - // CHECK: %[[COND_RESOURCE:.+]] = stream.async.dispatch @while_test_dispatch_0::@dispatch[%c1, %c1, %c1](%[[BB1_ARG]][%c0{{[_0-9]*}} to %[[BB1_ARG_SIZE]] for %[[BB1_ARG_SIZE]]]) : (!stream.resource<*>{%[[BB1_ARG_SIZE]]}) -> !stream.resource<*>{%[[COND_SIZE]]} + // CHECK: %[[COND_RESOURCE:.+]] = stream.tensor.dispatch @while_test_dispatch_0::@dispatch[%c1, %c1, %c1](%[[BB1_ARG]]) : (tensor in !stream.resource<*>{%[[BB1_ARG_SIZE]]}) -> tensor in !stream.resource<*>{%[[COND_SIZE]]} %2 = flow.dispatch @while_test_dispatch_0::@dispatch[%c1, %c1, %c1](%1) : (tensor) -> tensor // CHECK: %[[READBACK:.+]] = stream.async.transfer %[[COND_RESOURCE]] : !stream.resource<*>{%[[COND_SIZE]]} -> !stream.resource{%[[COND_SIZE]]} @@ -149,7 +149,7 @@ util.func public @while_test() { // CHECK: ^bb2: ^bb2: // CHECK: %[[BB2_VAR_SIZE:.+]] = stream.tensor.sizeof tensor : index - // CHECK: %[[BB2_VAR:.+]] = stream.async.dispatch @while_test_dispatch_1::@dispatch[%c1, %c1, %c1](%[[BB1_ARG]][%c0{{[_0-9]*}} to %[[BB1_ARG_SIZE]] for %[[BB1_ARG_SIZE]]]) : (!stream.resource<*>{%[[BB1_ARG_SIZE]]}) -> !stream.resource<*>{%[[BB2_VAR_SIZE]]} + // CHECK: %[[BB2_VAR:.+]] = stream.tensor.dispatch @while_test_dispatch_1::@dispatch[%c1, %c1, %c1](%[[BB1_ARG]]) : (tensor in !stream.resource<*>{%[[BB1_ARG_SIZE]]}) -> tensor in !stream.resource<*>{%[[BB2_VAR_SIZE]]} %4 = flow.dispatch @while_test_dispatch_1::@dispatch[%c1, %c1, %c1](%1) : (tensor) -> tensor // CHECK: cf.br ^bb1(%[[BB2_VAR]], %[[BB2_VAR_SIZE]] : !stream.resource<*>, index) diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir index ffe248aee796..9a97b9ec8323 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir @@ -4,317 +4,29 @@ util.func public @denseTensorSizeOf(%arg0: index) -> index { // CHECK: %[[STATIC_SIZE:.+]] = arith.constant 20 : index // CHECK: %[[DYNAMIC_SIZE:.+]] = arith.muli %arg0, %[[STATIC_SIZE]] : index - %0 = stream.tensor.sizeof tensor{%arg0} : index + %dynamic_size = stream.tensor.sizeof tensor{%arg0} : index // CHECK: util.return %[[DYNAMIC_SIZE]] - util.return %0 : index + util.return %dynamic_size : index } // ----- // CHECK-LABEL: @denseTensorSizeOfEmpty util.func public @denseTensorSizeOfEmpty(%arg0: index) -> index { - // CHECK: %[[ZERO:.+]] = arith.constant 0 : index - %0 = stream.tensor.sizeof tensor{%arg0} : index - // CHECK: util.return %[[ZERO]] - util.return %0 : index + // CHECK: %[[ZERO_SIZE:.+]] = arith.constant 0 : index + %zero_size = stream.tensor.sizeof tensor{%arg0} : index + // CHECK: util.return %[[ZERO_SIZE]] + util.return %zero_size : index } // ----- -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_dynamic_using_layouts -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> -#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_dynamic -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index { - %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic_using_layouts -// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg0, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]] -// CHECK: return %[[T0]] - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> -#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index { - %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic -// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]] -// CHECK: return %[[T0]] - -// ----- - -// In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The tile sizes -// (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is -// N-dimension and dim_0 is K-dimension. -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_rhs_encoding_dynamic_using_layouts -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C16]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> -#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_rhs_encoding_dynamic -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_result_encoding_dynamic_using_layouts -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> -#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_result_encoding_dynamic -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -// The layout is as the same as the the matmul LHS layout because it broadcasts -// across the batch dimension. The test is preserved for having the same test -// suite of non-layouts style encoding. I.e., this is the resolved layout -// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic -// test. -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> -#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> -#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> -#map3 = affine_map<(d0, d1, d2) -> (d1, d2)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -// The M-dimension inner tile is not present because it broadcasts across the -// M-dimension. We do not need to pack the M-dimension in this case. -#encoding_layout = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layouts -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// -// Multiplied by 4 because f32 has 4 bytes. -// -// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> -#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> -#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> -#map3 = affine_map<(d0, d1, d2) -> (d0, d2)> -#encoding = #iree_encoding.encoding> -util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// -// Multiplied by 4 because f32 has 4 bytes. -// -// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]] -// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// CHECK: return %[[T1]] - -// ----- - -#encoding_layout_0 = #iree_cpu.cpu_encoding_layout -#encoding_layout_1 = #iree_cpu.vmvx_encoding_layout -#encoding = #iree_encoding.encoding -util.func public @sizeof_multi_encoding_layouts(%arg0: index, %arg1: index) -> index { - %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index - util.return %0 : index -} -// CHECK-LABEL: @sizeof_multi_encoding_layouts -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// -// Check for the first layout. -// -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] -// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[SIZE0:.+]] = arith.muli %[[T0]], %[[PAD_D1]] -// -// Check for the first layout. -// -// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C2]] -// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C2]] -// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] -// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] -// CHECK: %[[T1:.+]] = arith.muli %[[PAD_D0]], %[[C4]] -// CHECK: %[[SIZE1:.+]] = arith.muli %[[T1]], %[[PAD_D1]] -// -// Return the max value. -// -// CHECK: %[[RES:.+]] = arith.maxui %[[SIZE0]], %[[SIZE1]] -// CHECK: return %[[RES]] - -// ----- - // CHECK-LABEL: @denseTensorEmpty util.func public @denseTensorEmpty(%arg0: index, %arg1: index) -> !stream.resource<*> { - // CHECK: %[[RET:.+]] = stream.async.alloca : !stream.resource<*>{%arg1} - %0 = stream.tensor.empty : tensor{%arg0} in !stream.resource<*>{%arg1} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.alloca : !stream.resource<*>{%arg1} + %result = stream.tensor.empty : tensor{%arg0} in !stream.resource<*>{%arg1} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -323,10 +35,10 @@ util.func public @denseTensorEmpty(%arg0: index, %arg1: index) -> !stream.resour util.func public @denseTensorConstant(%arg0: index) -> !stream.resource { // CHECK: %[[STATIC_SIZE:.+]] = arith.constant 1280 : index // CHECK: %[[DYNAMIC_SIZE:.+]] = arith.muli %arg0, %[[STATIC_SIZE]] : index - // CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource{%[[DYNAMIC_SIZE]]} = dense<0.000000e+00> : tensor<1x5x64xf32> - %0 = stream.tensor.constant : tensor{%arg0} in !stream.resource = dense<0.000000e+00> : tensor<1x5x64xf32> - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource + // CHECK: %[[RESULT:.+]] = stream.async.constant : !stream.resource{%[[DYNAMIC_SIZE]]} = dense<0.000000e+00> : tensor<1x5x64xf32> + %result = stream.tensor.constant : tensor{%arg0} in !stream.resource = dense<0.000000e+00> : tensor<1x5x64xf32> + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource } // ----- @@ -336,20 +48,20 @@ util.func public @denseTensorConstant(%arg0: index) -> !stream.resource !stream.resource { // CHECK: %[[STATIC_SIZE:.+]] = arith.constant 4 : index - // CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource{%[[STATIC_SIZE]]} = dense<[1, 1, 0, 1]> : tensor<4xi8> - %0 = stream.tensor.constant : tensor<4xi1> in !stream.resource = dense<[true, true, false, true]> : tensor<4xi1> - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource + // CHECK: %[[RESULT:.+]] = stream.async.constant : !stream.resource{%[[STATIC_SIZE]]} = dense<[1, 1, 0, 1]> : tensor<4xi8> + %result = stream.tensor.constant : tensor<4xi1> in !stream.resource = dense<[true, true, false, true]> : tensor<4xi1> + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource } // ----- // CHECK-LABEL: @denseTensorSplatI32 util.func public @denseTensorSplatI32(%arg0: i32, %arg1: index, %arg2: index) -> !stream.resource<*> { - // CHECK: %[[RET:.+]] = stream.async.splat %arg0 : i32 -> !stream.resource<*>{%arg2} - %0 = stream.tensor.splat %arg0 : i32 -> tensor{%arg1} in !stream.resource<*>{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %arg0 : i32 -> !stream.resource<*>{%arg2} + %result = stream.tensor.splat %arg0 : i32 -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -357,10 +69,10 @@ util.func public @denseTensorSplatI32(%arg0: i32, %arg1: index, %arg2: index) -> // CHECK-LABEL: @denseTensorSplatI1 util.func public @denseTensorSplatI1(%arg0: i1, %arg1: index, %arg2: index) -> !stream.resource<*> { // CHECK: %[[PATTERN:.+]] = arith.extui %arg0 : i1 to i8 - // CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i8 -> !stream.resource<*>{%arg2} - %0 = stream.tensor.splat %arg0 : i1 -> tensor{%arg1} in !stream.resource<*>{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %[[PATTERN]] : i8 -> !stream.resource<*>{%arg2} + %result = stream.tensor.splat %arg0 : i1 -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -368,10 +80,10 @@ util.func public @denseTensorSplatI1(%arg0: i1, %arg1: index, %arg2: index) -> ! // CHECK-LABEL: @denseTensorSplatBF16 util.func public @denseTensorSplatBF16(%arg0: bf16, %arg1: index, %arg2: index) -> !stream.resource<*> { // CHECK: %[[PATTERN:.+]] = arith.bitcast %arg0 : bf16 to i16 - // CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i16 -> !stream.resource<*>{%arg2} - %0 = stream.tensor.splat %arg0 : bf16 -> tensor{%arg1} in !stream.resource<*>{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %[[PATTERN]] : i16 -> !stream.resource<*>{%arg2} + %result = stream.tensor.splat %arg0 : bf16 -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -379,20 +91,20 @@ util.func public @denseTensorSplatBF16(%arg0: bf16, %arg1: index, %arg2: index) // CHECK-LABEL: @denseTensorSplatF32 util.func public @denseTensorSplatF32(%arg0: f32, %arg1: index, %arg2: index) -> !stream.resource<*> { // CHECK: %[[PATTERN:.+]] = arith.bitcast %arg0 : f32 to i32 - // CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i32 -> !stream.resource<*>{%arg2} - %0 = stream.tensor.splat %arg0 : f32 -> tensor{%arg1} in !stream.resource<*>{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %[[PATTERN]] : i32 -> !stream.resource<*>{%arg2} + %result = stream.tensor.splat %arg0 : f32 -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- // CHECK-LABEL: @denseTensorSplatI64 util.func public @denseTensorSplatI64(%arg0: i64, %arg1: index, %arg2: index) -> !stream.resource<*> { - // CHECK: %[[RET:.+]] = stream.async.splat %arg0 : i64 -> !stream.resource<*>{%arg2} - %0 = stream.tensor.splat %arg0 : i64 -> tensor{%arg1} in !stream.resource<*>{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %arg0 : i64 -> !stream.resource<*>{%arg2} + %result = stream.tensor.splat %arg0 : i64 -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -400,25 +112,25 @@ util.func public @denseTensorSplatI64(%arg0: i64, %arg1: index, %arg2: index) -> // CHECK-LABEL: @denseTensorSplatConstantComplexF32 util.func public @denseTensorSplatConstantComplexF32(%arg0: !stream.resource<*>) -> (!stream.resource<*>) { %cst = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex - %0 = stream.tensor.sizeof tensor<6xcomplex> : index + %result_size = stream.tensor.sizeof tensor<6xcomplex> : index // CHECK: %[[I64NUMBER:.+]] = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex // CHECK: %[[BITCAST:.+]] = complex.bitcast %[[I64NUMBER]] : complex to i64 - // CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]] - %1 = stream.tensor.splat %cst : complex -> tensor<6xcomplex> in !stream.resource<*>{%0} - // CHECK: util.return %[[SPLAT_RES]] - util.return %1 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %[[BITCAST]] + %result = stream.tensor.splat %cst : complex -> tensor<6xcomplex> in !stream.resource<*>{%result_size} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- // CHECK-LABEL: @denseTensorSplatDynamicComplexF32 util.func public @denseTensorSplatDynamicComplexF32(%arg0: !stream.resource<*>, %arg1: complex) -> (!stream.resource<*>) { - %0 = stream.tensor.sizeof tensor<6xcomplex> : index + %result_size = stream.tensor.sizeof tensor<6xcomplex> : index // CHECK: %[[BITCAST:.+]] = complex.bitcast %arg1 : complex to i64 - // CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]] - %1 = stream.tensor.splat %arg1 : complex -> tensor<6xcomplex> in !stream.resource<*>{%0} - // CHECK: util.return %[[SPLAT_RES]] - util.return %1 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.splat %[[BITCAST]] + %result = stream.tensor.splat %arg1 : complex -> tensor<6xcomplex> in !stream.resource<*>{%result_size} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -429,10 +141,12 @@ util.func public @denseTensorSplatDynamicComplexF32(%arg0: !stream.resource<*>, util.func public @denseTensorClone(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: f32) -> (!stream.resource<*>, !stream.resource<*>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index - // CHECK: %[[RET:.+]] = stream.async.clone %arg0 : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg2} - %0 = stream.tensor.clone %arg0 : tensor{%arg1} in !stream.resource<*>{%arg2} -> tensor{%arg1} in !stream.resource<*>{%arg2} - %1 = stream.tensor.fill %arg3, %0[%c0, %c0 for %c1, %c1] : f32 -> tensor{%arg1} in %0 as !stream.resource<*>{%arg2} - util.return %0, %1 : !stream.resource<*>, !stream.resource<*> + // CHECK: %[[CLONE:.+]] = stream.async.clone %arg0 : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg2} + %clone = stream.tensor.clone %arg0 : tensor{%arg1} in !stream.resource<*>{%arg2} -> tensor{%arg1} in !stream.resource<*>{%arg2} + // CHECK: %[[FILL:.+]] = stream.async.fill + %fill = stream.tensor.fill %arg3, %clone[%c0, %c0 for %c1, %c1] : f32 -> tensor{%arg1} in %0 as !stream.resource<*>{%arg2} + // CHECK: util.return %[[CLONE]], %[[FILL]] + util.return %clone, %fill : !stream.resource<*>, !stream.resource<*> } // ----- @@ -443,10 +157,10 @@ util.func public @denseTensorSlice(%arg0: !stream.resource<*>, %arg1: index, %ar %c1 = arith.constant 1 : index // CHECK: %[[OFFSET:.+]] = arith.constant 4 : index // CHECK: %[[END:.+]] = arith.addi %arg4, %[[OFFSET]] : index - // CHECK: %[[RET:.+]] = stream.async.slice %arg0[%[[OFFSET]] to %[[END]]] : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg4} - %0 = stream.tensor.slice %arg0[%c0, %c1 for %arg3, %c1] : tensor{%arg1} in !stream.resource<*>{%arg2} -> tensor{%arg3} in !stream.resource<*>{%arg4} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.slice %arg0[%[[OFFSET]] to %[[END]]] : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg4} + %result = stream.tensor.slice %arg0[%c0, %c1 for %arg3, %c1] : tensor{%arg1} in !stream.resource<*>{%arg2} -> tensor{%arg3} in !stream.resource<*>{%arg4} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -458,10 +172,10 @@ util.func public @denseTensorFillF32(%arg0: f32, %arg1: !stream.resource<*>, %ar // CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index // CHECK-DAG: %[[LENGTH:.+]] = arith.constant 20 : index // CHECK-DAG: %[[PATTERN:.+]] = arith.bitcast %arg0 : f32 to i32 - // CHECK: %[[RET:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i32 -> %arg1 as !stream.resource<*>{%arg3} - %0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f32 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i32 -> %arg1 as !stream.resource<*>{%arg3} + %result = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f32 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -472,10 +186,10 @@ util.func public @denseTensorFillI64(%arg0: i64, %arg1: !stream.resource<*>, %ar %c1 = arith.constant 1 : index // CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index // CHECK-DAG: %[[LENGTH:.+]] = arith.constant 40 : index - // CHECK: %[[RET:.+]] = stream.async.fill %arg0, %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3} - %0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : i64 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.fill %arg0, %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3} + %result = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : i64 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -487,10 +201,10 @@ util.func public @denseTensorFillF64(%arg0: f64, %arg1: !stream.resource<*>, %ar // CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index // CHECK-DAG: %[[LENGTH:.+]] = arith.constant 40 : index // CHECK-DAG: %[[PATTERN:.+]] = arith.bitcast %arg0 : f64 to i64 - // CHECK: %[[RET:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3} - %0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f64 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3} + %result = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f64 -> tensor{%arg2} in %arg1 as !stream.resource<*>{%arg3} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -500,10 +214,10 @@ util.func public @denseTensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %a %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index - // CHECK: %[[RET:.+]] = stream.async.update %arg0, %arg2[%[[OFFSET]] to %arg1] : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg4} - %0 = stream.tensor.update %arg0, %arg2[%c0, %c0] : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor{%arg3} in %arg2 as !stream.resource<*>{%arg4} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource<*> + // CHECK: %[[RESULT:.+]] = stream.async.update %arg0, %arg2[%[[OFFSET]] to %arg1] : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg4} + %result = stream.tensor.update %arg0, %arg2[%c0, %c0] : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor{%arg3} in %arg2 as !stream.resource<*>{%arg4} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource<*> } // ----- @@ -512,10 +226,10 @@ util.func public @denseTensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %a util.func public @denseTensorLoad(%arg0: !stream.resource, %arg1: index, %arg2: index) -> f32 { %c0 = arith.constant 0 : index // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index - // CHECK: %[[RET:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource{%arg2} -> f32 - %0 = stream.tensor.load %arg0[%c0] : tensor{%arg1} in !stream.resource{%arg2} -> f32 - // CHECK: util.return %[[RET]] - util.return %0 : f32 + // CHECK: %[[RESULT:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource{%arg2} -> f32 + %result = stream.tensor.load %arg0[%c0] : tensor{%arg1} in !stream.resource{%arg2} -> f32 + // CHECK: util.return %[[RESULT]] + util.return %result : f32 } // ----- @@ -524,10 +238,10 @@ util.func public @denseTensorLoad(%arg0: !stream.resource, %arg1: index util.func public @denseTensorLoadRank0(%arg0: !stream.resource, %arg1: index) -> f32 { %c0 = arith.constant 0 : index // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index - // CHECK: %[[RET:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource{%arg1} -> f32 - %0 = stream.tensor.load %arg0 : tensor in !stream.resource{%arg1} -> f32 - // CHECK: util.return %[[RET]] - util.return %0 : f32 + // CHECK: %[[RESULT:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource{%arg1} -> f32 + %result = stream.tensor.load %arg0 : tensor in !stream.resource{%arg1} -> f32 + // CHECK: util.return %[[RESULT]] + util.return %result : f32 } // ----- @@ -536,10 +250,10 @@ util.func public @denseTensorLoadRank0(%arg0: !stream.resource, %arg1: util.func public @denseTensorStore(%arg0: !stream.resource, %arg1: index, %arg2: index, %arg3: f32) -> !stream.resource { %c0 = arith.constant 0 : index // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index - // CHECK: %[[RET:.+]] = stream.async.store %arg3, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource{%arg2} - %0 = stream.tensor.store %arg3, %arg0[%c0] : f32 -> tensor{%arg1} in %arg0 as !stream.resource{%arg2} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource + // CHECK: %[[RESULT:.+]] = stream.async.store %arg3, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource{%arg2} + %result = stream.tensor.store %arg3, %arg0[%c0] : f32 -> tensor{%arg1} in %arg0 as !stream.resource{%arg2} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource } // ----- @@ -548,8 +262,27 @@ util.func public @denseTensorStore(%arg0: !stream.resource, %arg1: inde util.func public @denseTensorStoreRank0(%arg0: !stream.resource, %arg1: index, %arg2: f32) -> !stream.resource { %c0 = arith.constant 0 : index // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index - // CHECK: %[[RET:.+]] = stream.async.store %arg2, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource{%arg1} - %0 = stream.tensor.store %arg2, %arg0 : f32 -> tensor in %arg0 as !stream.resource{%arg1} - // CHECK: util.return %[[RET]] - util.return %0 : !stream.resource + // CHECK: %[[RESULT:.+]] = stream.async.store %arg2, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource{%arg1} + %result = stream.tensor.store %arg2, %arg0 : f32 -> tensor in %arg0 as !stream.resource{%arg1} + // CHECK: util.return %[[RESULT]] + util.return %result : !stream.resource +} + +// ----- + +// CHECK-LABEL: @denseTensorDispatch +// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource, %[[RESOURCE0_SIZE:[a-z0-9]+]]: index, %[[TENSOR0_DIM:[a-z0-9]+]]: index, +// CHECK-SAME: %[[RESOURCE1:.+]]: !stream.resource, %[[RESOURCE1_SIZE:[a-z0-9]+]]: index, %[[TENSOR1_DIM:[a-z0-9]+]]: index) +util.func public @denseTensorDispatch( + %resource0: !stream.resource, %resource0_size: index, %tensor0_dim: index, + %resource1: !stream.resource, %resource1_size: index, %tensor1_dim: index) -> (!stream.resource, !stream.resource) { + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: %[[RESULTS:.+]]:2 = stream.async.dispatch @ex::@entry + // CHECK-SAME: (%[[RESOURCE0]][%[[ZERO]] to %[[RESOURCE0_SIZE]] for %[[RESOURCE0_SIZE]]], + // CHECK-SAME: %[[RESOURCE1]][%[[ZERO]] to %[[RESOURCE1_SIZE]] for %[[RESOURCE1_SIZE]]]) + // CHECK-SAME: (!stream.resource{%[[RESOURCE0_SIZE]]}, !stream.resource{%[[RESOURCE1_SIZE]]}) -> + // CHECK-SAME: (!stream.resource{%[[RESOURCE1_SIZE]]}, %[[RESOURCE1]]{%[[RESOURCE1_SIZE]]}) + %results:2 = stream.tensor.dispatch @ex::@entry(%resource0, %resource1) : (tensor<4x?xf32>{%tensor0_dim} in !stream.resource{%resource0_size}, tensor{%tensor1_dim} in !stream.resource{%resource1_size}) -> (tensor{%tensor1_dim} in !stream.resource{%resource1_size}, tensor{%tensor1_dim} in %resource1{%resource1_size}) + // CHECK: util.return %[[RESULTS]]#0, %[[RESULTS]]#1 + util.return %results#0, %results#1 : !stream.resource, !stream.resource } diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir new file mode 100644 index 000000000000..8d670ebd6d1c --- /dev/null +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir @@ -0,0 +1,307 @@ +// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors %s | FileCheck %s + +// CHECK-LABEL: @tensorSizeOfUnalignedPackedI1 +util.func public @tensorSizeOfUnalignedPackedI1() -> index { + // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index + %0 = stream.tensor.sizeof tensor<12xi1, #iree_encoding.packed_storage> : index + // CHECK: return %[[C2]] : index + util.return %0 : index +} + +// ----- + +// CHECK-LABEL: @tensorSizeOfAlignedPackedI1 +util.func public @tensorSizeOfAlignedPackedI1() -> index { + // CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index + %0 = stream.tensor.sizeof tensor<24xi1, #iree_encoding.packed_storage> : index + // CHECK: util.return %[[C3]] : index + util.return %0 : index +} + +// ----- + +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_dynamic_using_layouts +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_dynamic +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index { + %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic_using_layouts +// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg0, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]] +// CHECK: return %[[T0]] + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index { + %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic +// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]] +// CHECK: return %[[T0]] + +// ----- + +// In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The tile sizes +// (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is +// N-dimension and dim_0 is K-dimension. +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_rhs_encoding_dynamic_using_layouts +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C16]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_rhs_encoding_dynamic +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_result_encoding_dynamic_using_layouts +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_result_encoding_dynamic +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +// The layout is as the same as the the matmul LHS layout because it broadcasts +// across the batch dimension. The test is preserved for having the same test +// suite of non-layouts style encoding. I.e., this is the resolved layout +// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic +// test. +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> +#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d1, d2)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +// The M-dimension inner tile is not present because it broadcasts across the +// M-dimension. We do not need to pack the M-dimension in this case. +#encoding_layout = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layouts +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// +// Multiplied by 4 because f32 has 4 bytes. +// +// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> +#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d2)> +#encoding = #iree_encoding.encoding> +util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// +// Multiplied by 4 because f32 has 4 bytes. +// +// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]] +// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// CHECK: return %[[T1]] + +// ----- + +#encoding_layout_0 = #iree_cpu.cpu_encoding_layout +#encoding_layout_1 = #iree_cpu.vmvx_encoding_layout +#encoding = #iree_encoding.encoding +util.func public @sizeof_multi_encoding_layouts(%arg0: index, %arg1: index) -> index { + %0 = stream.tensor.sizeof tensor{%arg0, %arg1} : index + util.return %0 : index +} +// CHECK-LABEL: @sizeof_multi_encoding_layouts +// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// +// Check for the first layout. +// +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C8]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]] +// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[SIZE0:.+]] = arith.muli %[[T0]], %[[PAD_D1]] +// +// Check for the first layout. +// +// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C2]] +// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C2]] +// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]] +// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]] +// CHECK: %[[T1:.+]] = arith.muli %[[PAD_D0]], %[[C4]] +// CHECK: %[[SIZE1:.+]] = arith.muli %[[T1]], %[[PAD_D1]] +// +// Return the max value. +// +// CHECK: %[[RES:.+]] = arith.maxui %[[SIZE0]], %[[SIZE1]] +// CHECK: return %[[RES]] diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_attr.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_attr.mlir deleted file mode 100644 index eefc9810aed5..000000000000 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_attr.mlir +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors %s | FileCheck %s - -#packed = #iree_encoding.packed_storage -func.func @unaligned_i1_size() -> index { - %0 = stream.tensor.sizeof tensor<12xi1, #packed> : index - return %0 : index -} -// CHECK: func @unaligned_i1_size() -> index { -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK: return %[[C2]] : index - -// ----- - -#packed = #iree_encoding.packed_storage -func.func @aligned_i1_size() -> index { - %0 = stream.tensor.sizeof tensor<24xi1, #packed> : index - return %0 : index -} - -// CHECK: func @aligned_i1_size() -> index { -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK: return %[[C3]] : index diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_experimental_clopt.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_experimental_clopt.mlir index 12527ae139b1..c96e05270d12 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_experimental_clopt.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1_experimental_clopt.mlir @@ -1,20 +1,23 @@ +// This is only used to test the experimental packing flag. When the default +// is changed the encode_host_tensors.mlir test should be updated and used +// instead and this file should be deleted. + // RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors --iree-experimental-packed-i1-storage %s | FileCheck %s -func.func @unaligned_i1_size() -> index { +// CHECK-LABEL: @tensorSizeOfUnalignedPackedI1 +util.func @tensorSizeOfUnalignedPackedI1() -> index { + // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index %0 = stream.tensor.sizeof tensor<12xi1> : index - return %0 : index + // CHECK: return %[[C2]] : index + util.return %0 : index } -// CHECK: func @unaligned_i1_size() -> index { -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK: return %[[C2]] : index // ----- -func.func @aligned_i1_size() -> index { +// CHECK-LABEL: @tensorSizeOfAlignedPackedI1 +util.func @tensorSizeOfAlignedPackedI1() -> index { + // CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index %0 = stream.tensor.sizeof tensor<24xi1> : index - return %0 : index + // CHECK: util.return %[[C3]] : index + util.return %0 : index } - -// CHECK: func @aligned_i1_size() -> index { -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK: return %[[C3]] : index diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp index 410252bab913..c9e470fd443e 100644 --- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp +++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp @@ -69,11 +69,8 @@ ArrayAttr deduplicateArrayElements(ArrayAttr arrayAttr) { return ArrayAttr::get(arrayAttr.getContext(), attrsSet.takeVector()); } -// Finds the operand index in |operands| that |tiedResult| references. -// Returns TiedOpInterface::kUntiedIndex if no operand is found. -static int64_t -findTiedOperand(OpAsmParser::UnresolvedOperand tiedResult, - ArrayRef operands) { +int64_t findTiedOperand(OpAsmParser::UnresolvedOperand tiedResult, + ArrayRef operands) { int64_t operandIndex = IREE::Util::TiedOpInterface::kUntiedIndex; for (int64_t i = 0; i < operands.size(); ++i) { if (operands[i].name == tiedResult.name && diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h index 1623b8e9a5bb..c0bdbce54a0c 100644 --- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h +++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h @@ -48,6 +48,11 @@ Value buildIfElseTree( // Removes duplicate attributes in the array (if any). ArrayAttr deduplicateArrayElements(ArrayAttr arrayAttr); +// Finds the operand index in |operands| that |tiedResult| references. +// Returns TiedOpInterface::kUntiedIndex if no operand is found. +int64_t findTiedOperand(OpAsmParser::UnresolvedOperand tiedResult, + ArrayRef operands); + //===----------------------------------------------------------------------===// // custom($sym_visibility) //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.td index ea1f222ad5d4..146e22c11c88 100644 --- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.td +++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.td @@ -183,6 +183,26 @@ def Util_ObjectType : TypeDef { }]; } +//===----------------------------------------------------------------------===// +// !util.unused +//===----------------------------------------------------------------------===// + +def Util_UnusedType : TypeDef { + let mnemonic = "unused"; + + let summary = [{a placeholder for unused types}]; + let description = [{ + An unused type placeholder used to satisfy verifiers that may require a + type even if unused. + }]; + + let builders = [ + TypeBuilder<(ins), [{ + return $_get($_ctxt); + }]> + ]; +} + //===----------------------------------------------------------------------===// // !util.variant //===----------------------------------------------------------------------===//