From 7c410492bdcbfdd4972de827d995bfb8482841a5 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 19 Jun 2024 13:37:22 -0700 Subject: [PATCH] Fixing broken fill builtins that were double offsetting. (#17696) This had been broken since inception for any fill that was not starting at offset 0 of the target resource. By taking the offset as an argument subsequent dispatch operand packing would offset by the binding range. --- .../iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir | 7 ++++--- .../iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir | 7 ++++--- .../iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir | 7 ++++--- .../src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir | 7 ++++--- .../Dialect/Stream/Transforms/MaterializeBuiltins.cpp | 1 - .../Stream/Transforms/test/materialize_builtins.mlir | 2 +- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir index dfbff7a9ead2..81f683f26cdf 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // stream.builtin.fill.i16 -// Writes the i16 %value %count times at byte %offset of %out_binding. +// Writes the i16 %value %count times in the bound range of %out_binding. stream.executable private @__builtin_fill_i16 { stream.executable.export public @__builtin_fill_i16 workgroups(%arg0: index) -> (index, index, index) { @@ -13,8 +13,9 @@ stream.executable private @__builtin_fill_i16 { stream.return %x, %y, %z : index, index, index } builtin.module { - func.func @__builtin_fill_i16(%value: i16, %offset: index, %count: index, %out_binding: !stream.binding) { - %out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor>{%count} + func.func @__builtin_fill_i16(%value: i16, %count: index, %out_binding: !stream.binding) { + %c0 = arith.constant 0 : index + %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor>{%count} %0 = tensor.empty(%count) : tensor %1 = linalg.fill ins(%value : i16) outs(%0 : tensor) -> tensor flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor -> !flow.dispatch.tensor>{%count} diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir index 8af753ae1ad0..43b0829e99e9 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // stream.builtin.fill.i32 -// Writes the i32 %value %count times at byte %offset of %out_binding. +// Writes the i32 %value %count times in the bound range of %out_binding. stream.executable private @__builtin_fill_i32 { stream.executable.export public @__builtin_fill_i32 workgroups(%arg0: index) -> (index, index, index) { @@ -13,8 +13,9 @@ stream.executable private @__builtin_fill_i32 { stream.return %x, %y, %z : index, index, index } builtin.module { - func.func @__builtin_fill_i32(%value: i32, %offset: index, %count: index, %out_binding: !stream.binding) { - %out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor>{%count} + func.func @__builtin_fill_i32(%value: i32, %count: index, %out_binding: !stream.binding) { + %c0 = arith.constant 0 : index + %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor>{%count} %0 = tensor.empty(%count) : tensor %1 = linalg.fill ins(%value : i32) outs(%0 : tensor) -> tensor flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor -> !flow.dispatch.tensor>{%count} diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir index 707667b0a361..96e527a20f0f 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // stream.builtin.fill.i64 -// Writes the i64 %value %count times at byte %offset of %out_binding. +// Writes the i64 %value %count times in the bound range of %out_binding. stream.executable private @__builtin_fill_i64 { stream.executable.export public @__builtin_fill_i64 workgroups(%arg0: index) -> (index, index, index) { @@ -13,8 +13,9 @@ stream.executable private @__builtin_fill_i64 { stream.return %x, %y, %z : index, index, index } builtin.module { - func.func @__builtin_fill_i64(%value: i64, %offset: index, %count: index, %out_binding: !stream.binding) { - %out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor>{%count} + func.func @__builtin_fill_i64(%value: i64, %count: index, %out_binding: !stream.binding) { + %c0 = arith.constant 0 : index + %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor>{%count} %0 = tensor.empty(%count) : tensor %1 = linalg.fill ins(%value : i64) outs(%0 : tensor) -> tensor flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor -> !flow.dispatch.tensor>{%count} diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir index 8b647ee153b7..7005ded9aee4 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // stream.builtin.fill.i8 -// Writes the i8 %value %count times at byte %offset of %out_binding. +// Writes the i8 %value %count times in the bound range of %out_binding. stream.executable private @__builtin_fill_i8 { stream.executable.export public @__builtin_fill_i8 workgroups(%arg0: index) -> (index, index, index) { @@ -13,8 +13,9 @@ stream.executable private @__builtin_fill_i8 { stream.return %x, %y, %z : index, index, index } builtin.module { - func.func @__builtin_fill_i8(%value: i8, %offset: index, %count: index, %out_binding: !stream.binding) { - %out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor>{%count} + func.func @__builtin_fill_i8(%value: i8, %count: index, %out_binding: !stream.binding) { + %c0 = arith.constant 0 : index + %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor>{%count} %0 = tensor.empty(%count) : tensor %1 = linalg.fill ins(%value : i8) outs(%0 : tensor) -> tensor flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor -> !flow.dispatch.tensor>{%count} diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp index c6d72ad9b024..7797ab179bd6 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp @@ -285,7 +285,6 @@ static LogicalResult replaceBuiltinFillOp(IREE::Stream::AsyncFillOp fillOp, SmallVector operands = { fillOp.getTarget(), pattern, - fillOp.getTargetOffset(), elementCount, }; SmallVector operandSizes = { diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir index bc50f4e1cdf1..5a8e9bc50790 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir @@ -40,7 +40,7 @@ util.func public @builtinSplatI64(%arg0: index, %arg1: i64) -> !stream.resource< // CHECK-SAME: (%[[RES:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[VALUE:.+]]: i64, %[[BYTE_OFFSET:.+]]: index, %[[BYTE_END:.+]]: index, %[[BYTE_LENGTH:.+]]: index) util.func public @builtinFillI64(%res: !stream.resource<*>, %size: index, %value: i64, %byte_offset: index, %byte_end: index, %byte_length: index) -> !stream.resource<*> { // CHECK: %[[COUNT:.+]] = arith.divui %[[BYTE_LENGTH]], %c8 - // CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_fill_i64::@__builtin_fill_i64[%[[COUNT]]](%[[RES]][%[[BYTE_OFFSET]] to %[[BYTE_END]] for %[[BYTE_LENGTH]]], %[[VALUE]], %[[BYTE_OFFSET]], %[[COUNT]]) : (!stream.resource<*>{%[[SIZE]]}, i64, index, index) -> %[[RES]]{%[[SIZE]]} + // CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_fill_i64::@__builtin_fill_i64[%[[COUNT]]](%[[RES]][%[[BYTE_OFFSET]] to %[[BYTE_END]] for %[[BYTE_LENGTH]]], %[[VALUE]], %[[COUNT]]) : (!stream.resource<*>{%[[SIZE]]}, i64, index) -> %[[RES]]{%[[SIZE]]} %0 = stream.async.fill %value, %res[%byte_offset to %byte_end for %byte_length] : i64 -> %arg0 as !stream.resource<*>{%size} // CHECK: util.return %[[RET]] util.return %0 : !stream.resource<*>