From 10ebaab9fcb796319df88dbf0db1a05d13a812c5 Mon Sep 17 00:00:00 2001 From: TFLM-bot Date: Tue, 7 Jan 2025 11:10:40 -0800 Subject: [PATCH 1/4] Automated sync from github.com/tensorflow/tensorflow (#3031) BUG=automated sync from upstream NO_CHECK_TFLITE_FILES=automated sync from upstream --- .../internal/reference/integer_ops/fully_connected.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h index 3a74402ed98..c6d06077934 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -42,12 +42,14 @@ void FullyConnectedPerChannel( const int32_t output_activation_min = params.quantized_activation_min; const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); TFLITE_DCHECK_LE(output_activation_min, output_activation_max); const int filter_dim_count = filter_shape.DimensionsCount(); - const int batches = output_shape.Dims(0); - const int output_depth = output_shape.Dims(1); + + const int output_dim_count = output_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = output_shape.Dims(output_dim_count - 1); TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { From 9b79b9faf208fddb509a0efc671bf338b5675ab9 Mon Sep 17 00:00:00 2001 From: chase Date: Thu, 16 Jan 2025 13:56:18 -0800 Subject: [PATCH 2/4] Allow signed char for Cortex M cores (#3035) BUG=384562154 --- .../tools/make/targets/cortex_m_generic_makefile.inc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/tools/make/targets/cortex_m_generic_makefile.inc b/tensorflow/lite/micro/tools/make/targets/cortex_m_generic_makefile.inc index 8c43b4159f5..99d72c49463 100644 --- a/tensorflow/lite/micro/tools/make/targets/cortex_m_generic_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/cortex_m_generic_makefile.inc @@ -28,6 +28,7 @@ endif FLOAT := soft GCC_TARGET_ARCH := $(TARGET_ARCH) +SIGNED_CHAR := false # Explicitly set this to true to include the kissfft symbols. INCLUDE_MICRO_SPEECH := false @@ -174,7 +175,6 @@ PLATFORM_FLAGS = \ -DTF_LITE_MCU_DEBUG_LOG \ -mthumb \ -mfloat-abi=$(FLOAT) \ - -funsigned-char \ -mlittle-endian \ -Wno-type-limits \ -Wno-unused-private-field \ @@ -182,6 +182,12 @@ PLATFORM_FLAGS = \ -MD \ -DCPU_$(CORE)=1 +ifeq ($(SIGNED_CHAR), false) + PLATFORM_FLAGS += -funsigned-char +else + PLATFORM_FLAGS += -fsigned-char +endif + # For DWT/PMU counters. Header file name is depending on target architecture. PLATFORM_FLAGS += -DCMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE=\"$(ARM_CPU).h\" PLATFORM_FLAGS += -D$(ARM_CPU) From 740cef3881cdd1f3954be97c2982c2cfcb3f81e0 Mon Sep 17 00:00:00 2001 From: Ryan OShea <86965113+ArmRyan@users.noreply.github.com> Date: Wed, 5 Feb 2025 02:33:16 +0100 Subject: [PATCH 3/4] CMSIS-NN Min Max int8 support (#2753) * Moves common functions to new maximum_minimum.h * Creates cmsis-nn/maximum_minimum.cc BUG=#2752 Change-Id: Ifbb3fedf53043b2f8d4c48d73c2ca44c7f0f87ca --- tensorflow/lite/micro/kernels/BUILD | 1 + .../micro/kernels/cmsis_nn/maximum_minimum.cc | 247 ++++++++++++++++++ .../lite/micro/kernels/maximum_minimum.cc | 50 +--- .../lite/micro/kernels/maximum_minimum.h | 105 ++++++++ .../lite/micro/micro_mutable_op_resolver.h | 22 +- 5 files changed, 367 insertions(+), 58 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc create mode 100644 tensorflow/lite/micro/kernels/maximum_minimum.h diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 1615d774907..8562d8bb53f 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -333,6 +333,7 @@ tflm_kernel_cc_library( "logistic.h", "lstm_eval.h", "lstm_shared.h", + "maximum_minimum.h", "micro_ops.h", "mul.h", "pad.h", diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc b/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc new file mode 100644 index 00000000000..a6affaa11bb --- /dev/null +++ b/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc @@ -0,0 +1,247 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/maximum_minimum.h" + +#include "Include/arm_nnfunctions.h" +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +cmsis_nn_dims FillVariableShape(int32_t rank, int32_t* tensor_dims) { + if (rank == 4) { + return {tensor_dims[0], tensor_dims[1], tensor_dims[2], tensor_dims[3]}; + } else if (rank == 3) { + return {1, tensor_dims[0], tensor_dims[1], tensor_dims[2]}; + } else if (rank == 2) { + return {1, 1, tensor_dims[0], tensor_dims[1]}; + } else { + return {1, 1, 1, 1}; + } +} + +TfLiteStatus EvalMaximum(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + + cmsis_nn_dims input_1_dims = FillVariableShape( + input_1_shape.DimensionsCount(), input_1_shape.DimsData()); + cmsis_nn_dims input_2_dims = FillVariableShape( + input_2_shape.DimensionsCount(), input_2_shape.DimsData()); + cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(), + output_shape.DimsData()); + + switch (op_context.output->type) { + case kTfLiteInt8: + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + arm_maximum_s8( + &ctx, tflite::micro::GetTensorData(input1), &input_1_dims, + tflite::micro::GetTensorData(input2), &input_2_dims, + tflite::micro::GetTensorData(output), &output_dims); + break; + case kTfLiteFloat32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt16: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); + break; + default: + MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.", + TfLiteTypeGetName(op_context.output->type), + op_context.output->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalMaximumInt8(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + + cmsis_nn_dims input_1_dims = FillVariableShape( + input_1_shape.DimensionsCount(), input_1_shape.DimsData()); + cmsis_nn_dims input_2_dims = FillVariableShape( + input_2_shape.DimensionsCount(), input_2_shape.DimsData()); + cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(), + output_shape.DimsData()); + + switch (op_context.output->type) { + case kTfLiteInt8: + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + arm_maximum_s8( + &ctx, tflite::micro::GetTensorData(input1), &input_1_dims, + tflite::micro::GetTensorData(input2), &input_2_dims, + tflite::micro::GetTensorData(output), &output_dims); + break; + default: + MicroPrintf("Type %s (%d) is not supported by Maximum Int8 Registration.", + TfLiteTypeGetName(op_context.output->type), + op_context.output->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalMinimum(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + + cmsis_nn_dims input_1_dims = FillVariableShape( + input_1_shape.DimensionsCount(), input_1_shape.DimsData()); + cmsis_nn_dims input_2_dims = FillVariableShape( + input_2_shape.DimensionsCount(), input_2_shape.DimsData()); + cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(), + output_shape.DimsData()); + + switch (op_context.output->type) { + case kTfLiteInt8: + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + arm_minimum_s8( + &ctx, tflite::micro::GetTensorData(input1), &input_1_dims, + tflite::micro::GetTensorData(input2), &input_2_dims, + tflite::micro::GetTensorData(output), &output_dims); + break; + case kTfLiteFloat32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt16: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); + break; + default: + MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.", + TfLiteTypeGetName(op_context.output->type), + op_context.output->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalMinimumInt8(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + + cmsis_nn_dims input_1_dims = FillVariableShape( + input_1_shape.DimensionsCount(), input_1_shape.DimsData()); + cmsis_nn_dims input_2_dims = FillVariableShape( + input_2_shape.DimensionsCount(), input_2_shape.DimsData()); + cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(), + output_shape.DimsData()); + + switch (op_context.output->type) { + case kTfLiteInt8: + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + arm_minimum_s8( + &ctx, tflite::micro::GetTensorData(input1), &input_1_dims, + tflite::micro::GetTensorData(input2), &input_2_dims, + tflite::micro::GetTensorData(output), &output_dims); + break; + default: + MicroPrintf("Type %s (%d) is not supported by Minimum Int8 registration.", + TfLiteTypeGetName(op_context.output->type), + op_context.output->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_MAXIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, EvalMaximum); +} + +TFLMRegistration Register_MINIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, EvalMinimum); +} + +TFLMRegistration Register_MAXIMUM_INT8() { + return tflite::micro::RegisterOp(nullptr, nullptr, EvalMaximumInt8); +} + +TFLMRegistration Register_MINIMUM_INT8() { + return tflite::micro::RegisterOp(nullptr, nullptr, EvalMinimumInt8); +} + +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.cc b/tensorflow/lite/micro/kernels/maximum_minimum.cc index 4dc87b40148..ef4a0a6a522 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,59 +23,13 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/maximum_minimum.h" #include "tensorflow/lite/micro/micro_log.h" namespace tflite { namespace { -// This file has a reference implementation of TFMaximum/TFMinimum. -enum KernelType { - kReference, -}; - -constexpr int kInputTensor1 = 0; -constexpr int kInputTensor2 = 1; -constexpr int kOutputTensor = 0; - -struct OpContext { - OpContext(TfLiteContext* context, TfLiteNode* node) { - input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1); - input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2); - output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); - } - const TfLiteEvalTensor* input1; - const TfLiteEvalTensor* input2; - TfLiteEvalTensor* output; -}; - -struct MaximumOp { - template - static data_type op(data_type el1, data_type el2) { - return el1 > el2 ? el1 : el2; - } -}; - -struct MinimumOp { - template - static data_type op(data_type el1, data_type el2) { - return el1 < el2 ? el1 : el2; - } -}; - -template -void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, - const OpContext& op_context) { - reference_ops::MaximumMinimumBroadcastSlow( - tflite::micro::GetTensorShape(op_context.input1), - tflite::micro::GetTensorData(op_context.input1), - tflite::micro::GetTensorShape(op_context.input2), - tflite::micro::GetTensorData(op_context.input2), - tflite::micro::GetTensorShape(op_context.output), - tflite::micro::GetTensorData(op_context.output), - op_type::template op); -} - template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpContext op_context(context, node); diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.h b/tensorflow/lite/micro/kernels/maximum_minimum.h new file mode 100644 index 00000000000..34d7e2399f3 --- /dev/null +++ b/tensorflow/lite/micro/kernels/maximum_minimum.h @@ -0,0 +1,105 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_ + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +// This file has a reference implementation of TFMaximum/TFMinimum. +enum KernelType { + kReference, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1); + input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2); + output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + } + const TfLiteEvalTensor* input1; + const TfLiteEvalTensor* input2; + TfLiteEvalTensor* output; +}; + +struct MaximumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 > el2 ? el1 : el2; + } +}; + +struct MinimumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 < el2 ? el1 : el2; + } +}; + +template +void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, + const OpContext& op_context) { + reference_ops::MaximumMinimumBroadcastSlow( + tflite::micro::GetTensorShape(op_context.input1), + tflite::micro::GetTensorData(op_context.input1), + tflite::micro::GetTensorShape(op_context.input2), + tflite::micro::GetTensorData(op_context.input2), + tflite::micro::GetTensorShape(op_context.output), + tflite::micro::GetTensorData(op_context.output), + op_type::template op); +} + +TFLMRegistration Register_MAXIMUM(); + +TFLMRegistration Register_MINIMUM(); + +#if defined(CMSIS_NN) +// Returns a TFLMRegistration struct for kernel variant that only supports +// int8. +TFLMRegistration Register_MAXIMUM_INT8(); + +// Returns a TFLMRegistration struct for kernel variant that only supports +// int8. +TFLMRegistration Register_MINIMUM_INT8(); + +#else +// Note that while this block gets used for both reference and optimized kernels +// that do not have any specialized implementations, the only goal here is to +// define fallback implementation that allow reference kernels to still be used +// from applications that call a more specific kernel variant. +inline TFLMRegistration Register_MAXIMUM_INT8() { return Register_MAXIMUM(); } + +inline TFLMRegistration Register_MINIMUM_INT8() { return Register_MINIMUM(); } + +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_ diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index f5f6e38e003..ad642ddbc06 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/depthwise_conv.h" #include "tensorflow/lite/micro/kernels/ethosu.h" #include "tensorflow/lite/micro/kernels/fully_connected.h" +#include "tensorflow/lite/micro/kernels/maximum_minimum.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" #include "tensorflow/lite/micro/kernels/mul.h" #include "tensorflow/lite/micro/kernels/pooling.h" @@ -414,9 +415,9 @@ class MicroMutableOpResolver : public MicroOpResolver { tflite::Register_LOG_SOFTMAX(), ParseLogSoftmax); } - TfLiteStatus AddMaximum() { - return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), - ParseMaximum); + TfLiteStatus AddMaximum( + const TFLMRegistration& registration = Register_MAXIMUM()) { + return AddBuiltin(BuiltinOperator_MAXIMUM, registration, ParseMaximum); } TfLiteStatus AddMaxPool2D( @@ -433,9 +434,9 @@ class MicroMutableOpResolver : public MicroOpResolver { return AddBuiltin(BuiltinOperator_MEAN, Register_MEAN(), ParseReducer); } - TfLiteStatus AddMinimum() { - return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), - ParseMinimum); + TfLiteStatus AddMinimum( + const TFLMRegistration& registration = Register_MINIMUM()) { + return AddBuiltin(BuiltinOperator_MINIMUM, registration, ParseMinimum); } TfLiteStatus AddMul(const TFLMRegistration& registration = Register_MUL()) { @@ -452,7 +453,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddOverlapAdd() { - // TODO(b/286250473): change back name to "OverlapAdd" and remove namespace + // TODO(b/286250473): change back name to "OverlapAdd" and remove + // namespace return AddCustom("SignalOverlapAdd", tflite::tflm_signal::Register_OVERLAP_ADD()); } @@ -684,8 +686,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } registrations_[registrations_len_] = registration; - // Strictly speaking, the builtin_code is not necessary for TFLM but filling - // it in regardless. + // Strictly speaking, the builtin_code is not necessary for TFLM but + // filling it in regardless. registrations_[registrations_len_].builtin_code = op; registrations_len_++; From ef64591270691022a329cf04ba9e73ecfb15ddb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Fri, 7 Feb 2025 04:02:49 +0100 Subject: [PATCH 4/4] Fix quant specific op registration for some ops (#2770) BUG=Quantization specific registration for BatchMatmul, SVDF and LSTM were not working correctly. --- tensorflow/lite/micro/kernels/BUILD | 2 + tensorflow/lite/micro/kernels/batch_matmul.cc | 2 + tensorflow/lite/micro/kernels/batch_matmul.h | 97 ++------------ .../lite/micro/kernels/batch_matmul_common.cc | 119 ++++++++++++++++++ .../lite/micro/micro_mutable_op_resolver.h | 10 +- tensorflow/lite/micro/tools/make/Makefile | 1 + 6 files changed, 140 insertions(+), 91 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/batch_matmul_common.cc diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 8562d8bb53f..d0bf2cbc0a2 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -222,6 +222,7 @@ tflm_kernel_cc_library( "arg_min_max.cc", "assign_variable.cc", "batch_matmul.cc", + "batch_matmul_common.cc", "batch_to_space_nd.cc", "broadcast_args.cc", "broadcast_to.cc", @@ -347,6 +348,7 @@ tflm_kernel_cc_library( "sub.h", "svdf.h", "transpose_conv.h", + "unidirectional_sequence_lstm.h", ] + select({ xtensa_fusion_f1_config(): glob(["xtensa/**/*.h"]), xtensa_hifi_3_config(): glob(["xtensa/**/*.h"]), diff --git a/tensorflow/lite/micro/kernels/batch_matmul.cc b/tensorflow/lite/micro/kernels/batch_matmul.cc index 15112e3b4cd..bbb1c0b0a7e 100644 --- a/tensorflow/lite/micro/kernels/batch_matmul.cc +++ b/tensorflow/lite/micro/kernels/batch_matmul.cc @@ -24,7 +24,9 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/transpose.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/internal/types.h" +#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/batch_matmul.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_log.h" namespace tflite { diff --git a/tensorflow/lite/micro/kernels/batch_matmul.h b/tensorflow/lite/micro/kernels/batch_matmul.h index 198b1d48ead..5e811fa3782 100644 --- a/tensorflow/lite/micro/kernels/batch_matmul.h +++ b/tensorflow/lite/micro/kernels/batch_matmul.h @@ -16,22 +16,12 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_KERNELS_BATCH_MATMUL_H_ #define TENSORFLOW_LITE_MICRO_KERNELS_BATCH_MATMUL_H_ -#include - #include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/kernels/internal/reference/transpose.h" #include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_common.h" -#include "tensorflow/lite/micro/micro_log.h" namespace tflite { -extern constexpr int kBatchMatmulInputLhsTensor = 0; -extern constexpr int kBatchMatmulInputRhsTensor = 1; -extern constexpr int kBatchMatmulOutputTensor = 0; - struct QuantizationOpDataBatchMatmul { // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. @@ -59,98 +49,29 @@ struct OpDataBatchMatmul { bool rhs_is_constant_tensor; }; +extern const int kBatchMatmulInputLhsTensor; +extern const int kBatchMatmulInputRhsTensor; +extern const int kBatchMatmulOutputTensor; + TfLiteStatus ReshapeOutputTensor(TfLiteContext* context, TfLiteNode* node, const RuntimeShape& extended_lhs_shape, const RuntimeShape& extended_rhs_shape, bool adj_x, bool adj_y, int output_rank, - TfLiteTensor* output) { - int64_t orig_size = NumElements(output); - - // make sure the new output dims rank does not exceed the original rank - TF_LITE_ENSURE(context, output_rank <= NumDimensions(output)); - - // make sure output tensor dims are not in the FlatBuffer - TfLiteEvalTensor* output_eval = - tflite::micro::GetEvalOutput(context, node, kBatchMatmulOutputTensor); - TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( - context, output, output_eval)); - - // Fill in any broadcast dimensions. - for (int i = 0; i < output_rank - 2; ++i) { - const int lhs_dim = extended_lhs_shape.Dims(i); - const int rhs_dim = extended_rhs_shape.Dims(i); - int broadcast_dim = lhs_dim; - if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) { - broadcast_dim = rhs_dim; - } - output->dims->data[i] = broadcast_dim; - } - // Fill in the matmul dimensions. - int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; - int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; - - output->dims->data[output_rank - 2] = extended_lhs_shape.Dims(lhs_rows_index); - output->dims->data[output_rank - 1] = extended_rhs_shape.Dims(rhs_cols_index); - output->dims->size = output_rank; - - // Check that output tensor has not been resized - // since TFLM doesn't support tensor resizing. - TF_LITE_ENSURE_EQ(context, orig_size, NumElements(output)); - - return kTfLiteOk; -} + TfLiteTensor* output); template void TransposeRowsColumnsImpl(const TfLiteEvalTensor& tensor_in, - TfLiteEvalTensor* tensor_out) { - const T* input = tflite::micro::GetTensorData(&tensor_in); - T* output = tflite::micro::GetTensorData(tensor_out); - RuntimeShape transposed_shape(tflite::micro::GetTensorShape(&tensor_in)); - RuntimeShape shape(transposed_shape); - TransposeParams params; - const int rank = shape.DimensionsCount(); - params.perm_count = rank; - for (int i = 0; i < rank - 2; ++i) { - params.perm[i] = i; - } - // Transpose the last two dimensions. - params.perm[rank - 2] = rank - 1; - params.perm[rank - 1] = rank - 2; - transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); - transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); - reference_ops::Transpose(params, shape, input, transposed_shape, output); -} + TfLiteEvalTensor* tensor_out); TfLiteStatus TransposeRowsColumns(const TfLiteEvalTensor& tensor_in, - TfLiteEvalTensor* tensor_out) { - if (tensor_in.type == kTfLiteFloat32) { - TransposeRowsColumnsImpl(tensor_in, tensor_out); - return kTfLiteOk; - } else if (tensor_in.type == kTfLiteInt8) { - TransposeRowsColumnsImpl(tensor_in, tensor_out); - return kTfLiteOk; - } else if (tensor_in.type == kTfLiteInt16) { - TransposeRowsColumnsImpl(tensor_in, tensor_out); - return kTfLiteOk; - } else { - MicroPrintf( - "BATCH_MATMUL can only transpose tensors with FLOAT32, INT8, INT16 " - "type."); - } - return kTfLiteError; -} + TfLiteEvalTensor* tensor_out); -RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) { - RuntimeShape swapped_shape(shape); - const int32_t dims = shape.DimensionsCount(); - swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); - swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); - return swapped_shape; -} +RuntimeShape SwapRowColumnDims(const RuntimeShape& shape); TFLMRegistration Register_BATCH_MATMUL(); #if defined(CMSIS_NN) + // Returns a TFLMRegistration struct for kernel variant that only supports // int8 matrix multiplication and uses the latency optimized // implementations. diff --git a/tensorflow/lite/micro/kernels/batch_matmul_common.cc b/tensorflow/lite/micro/kernels/batch_matmul_common.cc new file mode 100644 index 00000000000..1447cd489e9 --- /dev/null +++ b/tensorflow/lite/micro/kernels/batch_matmul_common.cc @@ -0,0 +1,119 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/lite/kernels/internal/reference/transpose.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/batch_matmul.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +const int kBatchMatmulInputLhsTensor = 0; +const int kBatchMatmulInputRhsTensor = 1; +const int kBatchMatmulOutputTensor = 0; + +TfLiteStatus ReshapeOutputTensor(TfLiteContext* context, TfLiteNode* node, + const RuntimeShape& extended_lhs_shape, + const RuntimeShape& extended_rhs_shape, + bool adj_x, bool adj_y, int output_rank, + TfLiteTensor* output) { + int64_t orig_size = NumElements(output); + + // make sure the new output dims rank does not exceed the original rank + TF_LITE_ENSURE(context, output_rank <= NumDimensions(output)); + + // make sure output tensor dims are not in the FlatBuffer + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kBatchMatmulOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) { + broadcast_dim = rhs_dim; + } + output->dims->data[i] = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output->dims->data[output_rank - 2] = extended_lhs_shape.Dims(lhs_rows_index); + output->dims->data[output_rank - 1] = extended_rhs_shape.Dims(rhs_cols_index); + output->dims->size = output_rank; + + // Check that output tensor has not been resized + // since TFLM doesn't support tensor resizing. + TF_LITE_ENSURE_EQ(context, orig_size, NumElements(output)); + + return kTfLiteOk; +} + +template +void TransposeRowsColumnsImpl(const TfLiteEvalTensor& tensor_in, + TfLiteEvalTensor* tensor_out) { + const T* input = tflite::micro::GetTensorData(&tensor_in); + T* output = tflite::micro::GetTensorData(tensor_out); + RuntimeShape transposed_shape(tflite::micro::GetTensorShape(&tensor_in)); + RuntimeShape shape(transposed_shape); + TransposeParams params; + const int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + reference_ops::Transpose(params, shape, input, transposed_shape, output); +} + +TfLiteStatus TransposeRowsColumns(const TfLiteEvalTensor& tensor_in, + TfLiteEvalTensor* tensor_out) { + if (tensor_in.type == kTfLiteFloat32) { + TransposeRowsColumnsImpl(tensor_in, tensor_out); + return kTfLiteOk; + } else if (tensor_in.type == kTfLiteInt8) { + TransposeRowsColumnsImpl(tensor_in, tensor_out); + return kTfLiteOk; + } else if (tensor_in.type == kTfLiteInt16) { + TransposeRowsColumnsImpl(tensor_in, tensor_out); + return kTfLiteOk; + } else { + MicroPrintf( + "BATCH_MATMUL can only transpose tensors with FLOAT32, INT8, INT16 " + "type."); + } + return kTfLiteError; +} + +RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) { + RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +} // namespace tflite diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index ad642ddbc06..f3f2080f0aa 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/compatibility.h" #include "tensorflow/lite/micro/kernels/add.h" +#include "tensorflow/lite/micro/kernels/batch_matmul.h" #include "tensorflow/lite/micro/kernels/conv.h" #include "tensorflow/lite/micro/kernels/depthwise_conv.h" #include "tensorflow/lite/micro/kernels/ethosu.h" @@ -34,7 +35,9 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/pooling.h" #include "tensorflow/lite/micro/kernels/reduce.h" #include "tensorflow/lite/micro/kernels/softmax.h" +#include "tensorflow/lite/micro/kernels/svdf.h" #include "tensorflow/lite/micro/kernels/transpose_conv.h" +#include "tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h" #include "tensorflow/lite/micro/micro_log.h" #include "tensorflow/lite/micro/micro_op_resolver.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -146,9 +149,10 @@ class MicroMutableOpResolver : public MicroOpResolver { return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, registration, ParsePool); } - TfLiteStatus AddBatchMatMul() { - return AddBuiltin(BuiltinOperator_BATCH_MATMUL, - tflite::Register_BATCH_MATMUL(), ParseBatchMatMul); + TfLiteStatus AddBatchMatMul( + const TFLMRegistration& registration = Register_BATCH_MATMUL()) { + return AddBuiltin(BuiltinOperator_BATCH_MATMUL, registration, + ParseBatchMatMul); } TfLiteStatus AddBatchToSpaceNd() { diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index e6912e91705..1b4f9d4bf2c 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -365,6 +365,7 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/add_n.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/arg_min_max.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/assign_variable.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/batch_matmul.cc \ +$(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/batch_matmul_common.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/batch_to_space_nd.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/broadcast_args.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/broadcast_to.cc \