From fed8118834ffebe7168f6414d0a045a46d38dae2 Mon Sep 17 00:00:00 2001 From: CFU-Playground-Bot Date: Mon, 20 Feb 2023 00:34:51 +0000 Subject: [PATCH] Sync from tflite-micro at 8889685. Signed-off-by: CFU-Playground-Bot --- conf/tflite-micro.version | 2 +- .../tensorflow/lite/c/builtin_op_data.h | 6 +- .../tensorflow/lite/c/c_api_types.h | 10 +- .../tflite-micro/tensorflow/lite/c/common.h | 10 +- .../tensorflow/lite/core/c/common.cc | 28 +- .../tensorflow/lite/core/c/common.h | 36 +- .../tensorflow/lite/kernels/internal/common.h | 181 +- .../internal/reference/integer_ops/conv.h | 15 - .../reference/integer_ops/depthwise_conv.h | 16 - .../reference/integer_ops/fully_connected.h | 138 +- .../internal/reference/integer_ops/mul.h | 10 +- .../lite/micro/examples/hello_world/BUILD | 28 + .../lite/micro/examples/hello_world/README.md | 23 + .../micro/examples/hello_world/evaluate.py | 131 ++ .../examples/hello_world/evaluate_test.py | 103 + .../examples/hello_world/hello_world.tflite | Bin 2312 -> 2864 bytes .../examples/hello_world/hello_world_test.cc | 35 +- .../hello_world/images/hello_world_tflite.png | Bin 0 -> 25264 bytes .../hello_world/images/hello_world_tflm.png | Bin 0 -> 25148 bytes .../examples/hello_world/main_functions.cc | 14 +- .../train/train_hello_world_model.ipynb | 2 - .../train/train_hello_world_model.py | 2 - .../micro/examples/micro_speech/Makefile.inc | 10 +- .../lite/micro/examples/mnist_lstm/BUILD | 2 +- .../examples/mnist_lstm/evaluate_test.py | 2 +- .../mnist_lstm/trained_lstm_int8.tflite | Bin 0 -> 13952 bytes .../examples/person_detection/Makefile.inc | 10 +- .../lite/micro/kernels/activations_common.cc | 4 +- .../tensorflow/lite/micro/kernels/ceil.cc | 14 +- .../lite/micro/kernels/comparisons.cc | 30 +- .../lite/micro/kernels/concatenation.cc | 14 +- .../tensorflow/lite/micro/kernels/conv.cc | 12 +- .../lite/micro/kernels/conv_test.cc | 6 + .../lite/micro/kernels/depthwise_conv.cc | 20 +- .../lite/micro/kernels/depthwise_conv_test.cc | 186 +- .../tensorflow/lite/micro/kernels/floor.cc | 14 +- .../lite/micro/kernels/fully_connected.cc | 20 +- .../lite/micro/kernels/fully_connected.h | 2 +- .../micro/kernels/fully_connected_common.cc | 3 +- .../lite/micro/kernels/gather_nd.cc | 7 + .../lite/micro/kernels/kernel_util.cc | 20 + .../lite/micro/kernels/kernel_util.h | 8 + .../lite/micro/kernels/leaky_relu_common.cc | 9 +- .../lite/micro/kernels/lstm_eval.cc | 1563 ++------------- .../tensorflow/lite/micro/kernels/lstm_eval.h | 674 ++++--- .../lite/micro/kernels/lstm_eval_test.h | 817 ++++++++ .../lite/micro/kernels/lstm_shared.h | 83 + .../lite/micro/kernels/maximum_minimum.cc | 24 +- .../tensorflow/lite/micro/kernels/micro_ops.h | 40 +- .../lite/micro/kernels/mul_common.cc | 16 +- .../tensorflow/lite/micro/kernels/pack.cc | 11 +- .../tensorflow/lite/micro/kernels/pooling.cc | 16 +- .../tensorflow/lite/micro/kernels/pooling.h | 61 +- .../lite/micro/kernels/pooling_common.cc | 54 +- .../tensorflow/lite/micro/kernels/split_v.cc | 13 +- .../lite/micro/kernels/strided_slice.cc | 15 +- .../lite/micro/kernels/sub_common.cc | 10 +- .../lite/micro/kernels/svdf_common.cc | 16 +- .../lite/micro/kernels/testdata/BUILD | 13 + .../micro/kernels/testdata/lstm_test_data.cc | 309 +++ .../micro/kernels/testdata/lstm_test_data.h | 579 ++++++ .../kernels/unidirectional_sequence_lstm.cc | 1768 +++++------------ .../lite/micro/micro_allocation_info.cc | 10 +- .../tensorflow/lite/micro/micro_allocator.cc | 50 +- .../tensorflow/lite/micro/micro_allocator.h | 10 +- .../lite/micro/micro_mutable_op_resolver.h | 48 +- .../tensorflow/lite/micro/micro_profiler.cc | 4 +- .../lite/micro/micro_resource_variable.cc | 18 +- .../lite/micro/micro_resource_variable.h | 2 + .../lite/micro/recording_micro_allocator.cc | 9 +- .../lite/micro/recording_micro_allocator.h | 7 +- .../tensorflow/lite/micro/tools/BUILD | 34 + .../tools/ci_build/test_xtensa_hifimini.sh | 50 + .../micro_mutable_op_resolver_test.cc.mako | 2 +- .../tensorflow/lite/micro/tools/make/Makefile | 4 +- .../person_detection_int8_vela_convert.sh | 2 + .../lite/micro/tools/make/ext_libs/xtensa.inc | 18 +- .../lite/micro/tools/requantize_flatbuffer.py | 204 ++ .../micro/tools/requantize_flatbuffer_test.py | 115 ++ .../tools/requantize_flatbuffer_utils.py | 219 ++ 80 files changed, 4450 insertions(+), 3621 deletions(-) create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version index 9874f722c..30d854090 100644 --- a/conf/tflite-micro.version +++ b/conf/tflite-micro.version @@ -1 +1 @@ -8746ec9 +8889685 diff --git a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h index b1981b3c5..7628e5ad1 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h +++ b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,8 +15,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ #define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/builtin_op_data.h. -#include "tensorflow/lite/core/c/builtin_op_data.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/builtin_op_data.h" #endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h index 18bccde66..cdbf1fd32 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h +++ b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,15 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - -// This file declares types used by the pure C inference API defined in c_api.h, -// some of which are also used in the C++ and C kernel and interpreter APIs. - #ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_ #define TENSORFLOW_LITE_C_C_API_TYPES_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/c_api_types.h. -#include "tensorflow/lite/core/c/c_api_types.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/c_api_types.h" #endif // TENSORFLOW_LITE_C_C_API_TYPES_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/c/common.h b/third_party/tflite-micro/tensorflow/lite/c/common.h index 718650df8..0e4858124 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/common.h +++ b/third_party/tflite-micro/tensorflow/lite/c/common.h @@ -36,8 +36,12 @@ limitations under the License. #ifndef TENSORFLOW_LITE_C_COMMON_H_ #define TENSORFLOW_LITE_C_COMMON_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/common.h. -#include "tensorflow/lite/core/c/common.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/common.h" + +// TfLiteOpaqueDelegate: allows delegation of nodes to alternative backends. +// TfLiteOpaqueDelegate is an abstract type that is intended to have the same +// role as TfLiteDelegate, but without necessarily exposing the implementation +// details of how delegates are implemented. +typedef TfLiteDelegate TfLiteOpaqueDelegate; #endif // TENSORFLOW_LITE_C_COMMON_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc index 827312b45..00bbcde28 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc @@ -219,11 +219,11 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) { return kTfLiteOk; } -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data) { +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data) { if (tensor->allocation_type != kTfLiteDynamic && tensor->allocation_type != kTfLitePersistentRo) { - return; + return kTfLiteOk; } #ifdef TF_LITE_TENSORFLOW_PROFILER tflite::PauseHeapMonitoring(/*pause=*/true); @@ -258,9 +258,15 @@ void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, tflite::PauseHeapMonitoring(/*pause=*/false); #endif tensor->bytes = num_bytes; + if (tensor->data.data == nullptr && num_bytes != 0) { + // We are done allocating but tensor is pointing to null and a valid size + // was requested, so we error. + return kTfLiteError; + } + return kTfLiteOk; } -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true); } #endif // TF_LITE_STATIC_MEMORY @@ -331,4 +337,18 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) { delete tflite_delegate; } +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) { + if (!delegate) return nullptr; + + // The following cast is safe only because this code is part of the + // TF Lite runtime implementation. Apps using TF Lite should not rely on + // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent. + const auto* tflite_delegate = + reinterpret_cast(delegate); + + if (!tflite_delegate->opaque_delegate_builder) return tflite_delegate->data_; + + return tflite_delegate->opaque_delegate_builder->data; +} + } // extern "C" diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.h b/third_party/tflite-micro/tensorflow/lite/core/c/common.h index 46d5e650a..36bb01a96 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.h +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_ #define TENSORFLOW_LITE_CORE_C_COMMON_H_ +#include #include #include #include @@ -648,23 +649,26 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // If `preserve_data` is true, tensor data will be unchanged in the range from -// the start of the region up to the minimum of the old and new sizes. -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data); +// the start of the region up to the minimum of the old and new sizes. In the +// case of NULL tensor, or an error allocating new memory, returns +// `kTfLiteError`. +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than kTfLiteDynamic will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // Tensor data will be unchanged in the range from the start of the region up to -// the minimum of the old and new sizes. -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); +// the minimum of the old and new sizes. In the case +// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`. +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); #endif // TF_LITE_STATIC_MEMORY // WARNING: This is an experimental interface that is subject to change. @@ -1135,6 +1139,20 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate( // 'delegate' is a null pointer. void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate); +// Returns a pointer to the data associated with the provided opaque 'delegate'. +// +// A null pointer will be returned when: +// - The 'delegate' is null. +// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the +// 'delegate' was null. +// - Or in case of any other error. +// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder', +// but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null. +// +// The data_ field of 'delegate' will be returned if the +// 'opaque_delegate_builder' field is null. +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h index c641bc94c..65e248de8 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h @@ -377,40 +377,49 @@ inline Integer FloorLog2(Integer n) { } } -// The size of the LUT depends on the type of input. For uint8 and int8 inputs -// we use a 256 entries LUT to map all the values in the (u)int8 range. For -// int16 inputs the high 9 bits are used for indexing and the 7 remaining bits -// are used for interpolation. We thus use a 513-entries LUT for int16 cases, -// 512 for the 9-bit indexing and 1 extra entry to interpolate the last value. -template -constexpr int LUTSize() { - static_assert(std::is_same::value || - std::is_same::value || - std::is_same::value, - "Only LUTs with uint8, int8 or int16 inputs are supported."); - // As per c++11: constexpr methods cannot have more than one return statement. - return (std::is_same::value || std::is_same::value) - ? 256 - : 513; +namespace detail { + +// LUTPopulate takes an optional type-erased transform_params to allow passing +// extra parameters to the transform function pointer. const void* is used +// instead of std::function to be compatible with TFLite Micro +template +inline typename std::enable_if::value, + FloatT>::type +LUTTransform(Func transform, const void* /*transform_params*/, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value); +} + +template +inline typename std::enable_if< + std::is_same::value, FloatT>::type +LUTTransform(Func transform, const void* transform_params, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value, transform_params); } // Use the same LUT generation code for both uint8_t and int8_t. Int8_t indexes // will be directly casted to uint8_t, the int8 LUT will thus be ordered as [0, // 1, ..., 127, -128, ..., -2, -1] instead of [-128, -127, ..., -1, 0, 1, ..., // 126, 127]. -template -inline typename std::enable_if::value || - std::is_same::value, - void>::type -LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, - int32_t output_zero_point, float (*transform)(float), T* lut) { +template +inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point, + float output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + T* lut) { + static_assert( + std::is_same::value || std::is_same::value, + "T must be an uint8 or int8 type."); uint8_t* lut_uint8 = reinterpret_cast(lut); const float inverse_scale = 1 / output_scale; int32_t maxval = std::numeric_limits::max(); int32_t minval = std::numeric_limits::min(); for (int32_t val = minval; val <= maxval; ++val) { const float dequantized = input_scale * (val - input_zero_point); - const float transformed = transform(dequantized); + const float transformed = + LUTTransform(transform, transform_params, dequantized); const float rescaled = TfLiteRound(transformed * inverse_scale); const int32_t quantized = static_cast(rescaled + output_zero_point); @@ -421,10 +430,11 @@ LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, // Keep floating-point type configurable for backward compatibility. float // should be used for FloatT by default. -template -inline typename std::enable_if::value, void>::type -LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, - int32_t output_zero_point, FloatT (*transform)(FloatT), T* lut) { +template +inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point, + FloatT output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + int16_t* lut) { static_assert(std::is_floating_point::value, "FloatT must be a floating-point type."); const FloatT input_min = @@ -440,16 +450,21 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, const FloatT step = (input_max - input_min) / nb_steps; const FloatT half_step = step / 2; const FloatT output_scaling_inv = - static_cast(std::numeric_limits::max() - - std::numeric_limits::min() + 1) / + static_cast(std::numeric_limits::max() - + std::numeric_limits::min() + 1) / (output_max - output_min); - const FloatT table_min = static_cast(std::numeric_limits::min()); - const FloatT table_max = static_cast(std::numeric_limits::max()); + const FloatT table_min = + static_cast(std::numeric_limits::min()); + const FloatT table_max = + static_cast(std::numeric_limits::max()); for (int i = 0; i < nb_steps; i++) { - const FloatT val = transform(input_min + i * step); - const FloatT val_midpoint = transform(input_min + i * step + half_step); - const FloatT val_next = transform(input_min + (i + 1) * step); + const FloatT val = + LUTTransform(transform, transform_params, input_min + i * step); + const FloatT val_midpoint = LUTTransform( + transform, transform_params, input_min + i * step + half_step); + const FloatT val_next = LUTTransform(transform, transform_params, + input_min + (i + 1) * step); const FloatT sample_val = TfLiteRound(val * output_scaling_inv); const FloatT midpoint_interp_val = @@ -460,54 +475,84 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, const FloatT midpoint_err = midpoint_interp_val - midpoint_val; const FloatT bias = TfLiteRound(midpoint_err / 2); - lut[i] = static_cast(std::min( + lut[i] = static_cast(std::min( std::max(sample_val - bias, table_min), table_max)); } - lut[nb_steps] = static_cast(std::min( - std::max(TfLiteRound(transform(input_max) * output_scaling_inv), + lut[nb_steps] = static_cast(std::min( + std::max(TfLiteRound(LUTTransform( + transform, transform_params, input_max) * + output_scaling_inv), table_min), table_max)); } +} // namespace detail + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float), T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, transform_params, lut); +} + template inline typename std::enable_if::value, void>::type LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, int32_t output_zero_point, float (*transform)(float), T* lut) { - LUTPopulate(input_scale, input_zero_point, output_scale, - output_zero_point, transform, lut); + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value, void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, + transform_params, lut); } -// Deprecated and will be removed in future, please use LUTPopulate instead -template -inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max, - FloatT output_min, FloatT output_max, LutOutT* lut) { - static_assert(std::is_same::value, - "Input and output type of the LUT must be the same."); - static_assert(std::is_same::value, - "Only int16_t type LUT are supported."); - static_assert(std::is_same::value, - "Only float type is supported for FloatT."); - using T = LutInT; - - const auto zero_point = [](float min, float max, float scale) { - // Symmetric int16 LUT, we know the zero-point will not overflow an int32_t - // and zero-point from min will be the same as from max. - return static_cast( - static_cast(std::numeric_limits::min()) - min / scale); - }; - - const float scale = static_cast(std::numeric_limits::max() - - std::numeric_limits::min()); - const float input_scale = (input_max - input_min) / scale; - const FloatT output_scale = (output_max - output_min) / scale; - const int32_t input_zero_point = - zero_point(input_min, input_max, input_scale); - const int32_t output_zero_point = - zero_point(output_min, output_max, output_scale); - - return LUTPopulate(input_scale, input_zero_point, output_scale, - output_zero_point, func, lut); +// Deprecated, avoid usage and prefer the float version. Kept for +// backward-compatiblity. +template +inline typename std::enable_if::value, void>::type +LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale, + int32_t output_zero_point, double (*transform)(double), T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +// The size of the LUT depends on the type of input. For uint8 and int8 inputs a +// simple 256 entries LUT is used. For int16 inputs the high 9 bits are used for +// indexing and the 7 remaining bits are used for interpolation. We thus use a +// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry +// to interpolate the last value. +template +constexpr int LUTSize() { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, + "Only LUTs with uint8, int8 or int16 inputs are supported."); + // As per c++11: constexpr methods cannot have more than one return statement. + return (std::is_same::value || std::is_same::value) + ? 256 + : 513; } // int16_t -> int16_t table lookup with interpolation diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h index 5ddf04aea..ba3e2a81d 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -134,20 +133,6 @@ inline void ConvPerChannel( } } -inline void ConvPerChannelWithPackedInt4Weights( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_input, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK(unpacked_filter_data != nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_input, filter_shape.FlatSize(), unpacked_filter_data); - ConvPerChannel(params, output_multiplier, output_shift, input_shape, - input_data, filter_shape, unpacked_filter_data, bias_shape, - bias_data, output_shape, output_data); -} // Fixed-point per-channel-quantization convolution reference kernel. // 16-bit data and 8-bit filter diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h index 312ba0f93..7676fce0f 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -122,21 +121,6 @@ inline void DepthwiseConvPerChannel( } } -inline void DepthwiseConvPerChannelWithPackedInt4Weights( - const DepthwiseParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_NE(unpacked_filter_data, nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_data, filter_shape.FlatSize(), unpacked_filter_data); - DepthwiseConvPerChannel(params, output_multiplier, output_shift, input_shape, - input_data, filter_shape, unpacked_filter_data, - bias_shape, bias_data, output_shape, output_data); -} - inline void DepthwiseConvPerChannel( const DepthwiseParams& params, const int32_t* output_multiplier, const int32_t* output_shift, const RuntimeShape& input_shape, diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h index 77c766d25..3a74402ed 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -29,14 +28,15 @@ namespace reference_integer_ops { // zero_point (params.weights_offset) is always 0. // However, for per-tensor functions, params.weights_offset is still applied for // backward compatibility. - -inline void FullyConnectedPerChannel( +template +void FullyConnectedPerChannel( const FullyConnectedParams& params, const int32_t* output_multiplier, const int* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { + const InputType* input_data, const RuntimeShape& filter_shape, + const WeightType* filter_data, const RuntimeShape& bias_shape, + const BiasType* bias_data, const RuntimeShape& output_shape, + OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t output_offset = params.output_offset; const int32_t output_activation_min = params.quantized_activation_min; @@ -52,7 +52,7 @@ inline void FullyConnectedPerChannel( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -61,62 +61,26 @@ inline void FullyConnectedPerChannel( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c], - output_shift[out_c]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -template -inline void FullyConnectedPerChannel( - const FullyConnectedParams& params, const int32_t* output_multiplier, - const int* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += filter_val * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } int32_t acc_scaled = MultiplyByQuantizedMultiplier( acc, output_multiplier[out_c], output_shift[out_c]); + acc_scaled += output_offset; acc_scaled = std::max(acc_scaled, output_activation_min); acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { +template +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, + const InputType* input_data, + const RuntimeShape& filter_shape, + const WeightType* filter_data, + const RuntimeShape& bias_shape, const BiasType* bias_data, + const RuntimeShape& output_shape, OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t filter_offset = params.weights_offset; const int32_t output_offset = params.output_offset; @@ -136,7 +100,7 @@ inline void FullyConnected( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -145,67 +109,13 @@ inline void FullyConnected( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -inline void FullyConnectedWithPackedInt4Weights( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_NE(unpacked_filter_data, nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_data, filter_shape.FlatSize(), unpacked_filter_data); - FullyConnected(params, input_shape, input_data, filter_shape, - unpacked_filter_data, bias_shape, bias_data, output_shape, - output_data); -} - -template -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t filter_offset = params.weights_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } int32_t acc_scaled = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc_scaled += output_offset; acc_scaled = std::max(acc_scaled, output_activation_min); acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h index 22e897409..05066184c 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h @@ -24,10 +24,10 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { -template -inline void MulElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { +template +void MulElementwise(int size, const ArithmeticParams& params, + const InputType* input1_data, const InputType* input2_data, + OutputType* output_data) { for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; const int32_t input2_val = params.input2_offset + input2_data[i]; @@ -39,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params, const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD index e2ed47685..f5bba89d1 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD @@ -1,5 +1,6 @@ # Description: # TensorFlow Lite for Microcontrollers "hello world" example. +load("@tflm_pip_deps//:requirements.bzl", "requirement") load( "//tensorflow/lite/micro:build_def.bzl", "generate_cc_arrays", @@ -105,3 +106,30 @@ sh_test( srcs = ["hello_world_binary_test.sh"], data = [":hello_world"], ) + +py_binary( + name = "evaluate", + srcs = ["evaluate.py"], + data = ["hello_world.tflite"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + "@absl_py//absl:app", + "@absl_py//absl/flags", + "@absl_py//absl/logging", + requirement("numpy"), + requirement("tensorflow-cpu"), + "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime", + ], +) + +py_binary( + name = "evaluate_test", + srcs = ["evaluate_test.py"], + data = ["hello_world.tflite"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":evaluate", + ], +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md index b5bb00ff2..2f1320897 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md @@ -10,9 +10,32 @@ microcontroller. ## Table of contents +- [Run the evaluate.py script on a development machine](#run-the-evaluate-script-on-a-development-machine) - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Train your own model](#train-your-own-model) +## Run the evaluate.py script on a development machine +The evaluate.py script runs the hello_world.tflite model with x_values in the +range of [0, 2*PI]. The script plots a diagram of the predicted value of sinwave +using TFLM interpreter and compare that prediction with the actual value +generated by the numpy lib. +```bash +bazel build :evaluate +bazel run :evaluate +bazel run :evaluate -- --use_tflite +``` +![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflm.png) ![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflite.png) + +## Run the evaluate_test.py script on a development machine +These tests verify the input/output as well as the prediction of the +hello_world.tflite model. There is a test to also verify the correctness of +the model by running both TFLM and TFlite interpreter and then comparing the +prediction from both interpreters. +```bash +bazel build :evaluate_test +bazel run :evaluate_test +``` + ## Run the tests on a development machine ```bash diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py new file mode 100644 index 000000000..773ed11cd --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py @@ -0,0 +1,131 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tensorflow as tf +from absl import app +from absl import flags +import numpy as np +import matplotlib.pyplot as plt +from tensorflow.python.platform import resource_loader +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime + +_USE_TFLITE_INTERPRETER = flags.DEFINE_bool( + 'use_tflite', + False, + 'Inference with the TF Lite interpreter instead of the TFLM interpreter', +) + +_PREFIX_PATH = resource_loader.get_path_to_datafile('') + + +def invoke_tflm_interpreter(input_shape, interpreter, x_value, input_index, + output_index): + input_data = np.reshape(x_value, input_shape) + interpreter.set_input(input_data, input_index) + interpreter.invoke() + y_quantized = np.reshape(interpreter.get_output(output_index), -1)[0] + return y_quantized + + +def invoke_tflite_interpreter(input_shape, interpreter, x_value, input_index, + output_index): + input_data = np.reshape(x_value, input_shape) + interpreter.set_tensor(input_index, input_data) + interpreter.invoke() + tflite_output = interpreter.get_tensor(output_index) + y_quantized = np.reshape(tflite_output, -1)[0] + return y_quantized + + +# Generate a list of 1000 random floats in the range of 0 to 2*pi. +def generate_random_input(sample_count=1000): + # Generate a uniformly distributed set of random numbers in the range from + # 0 to 2π, which covers a complete sine wave oscillation + x_values = np.random.uniform(low=0, high=2 * np.pi, + size=sample_count).astype(np.float32) + # Shuffle the values to guarantee they're not in order + np.random.shuffle(x_values) + return x_values + + +# Invoke the tflm interpreter with x_values in the range of [0, 2*PI] and +# returns the prediction of the interpreter. +def get_tflm_prediction(model_path, x_values): + # Create the tflm interpreter + tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path) + + input_shape = np.array(tflm_interpreter.get_input_details(0).get('shape')) + + y_predictions = np.empty(x_values.size, dtype=np.float32) + + for i, x_value in enumerate(x_values): + y_predictions[i] = invoke_tflm_interpreter(input_shape, + tflm_interpreter, + x_value, + input_index=0, + output_index=0) + return y_predictions + + +# Invoke the tflite interpreter with x_values in the range of [0, 2*PI] and +# returns the prediction of the interpreter. +def get_tflite_prediction(model_path, x_values): + # TFLite interpreter + tflite_interpreter = tf.lite.Interpreter( + model_path=model_path, + experimental_op_resolver_type=tf.lite.experimental.OpResolverType. + BUILTIN_REF, + ) + tflite_interpreter.allocate_tensors() + + input_details = tflite_interpreter.get_input_details()[0] + output_details = tflite_interpreter.get_output_details()[0] + input_shape = np.array(input_details.get('shape')) + + y_predictions = np.empty(x_values.size, dtype=np.float32) + + for i, x_value in enumerate(x_values): + y_predictions[i] = invoke_tflite_interpreter( + input_shape, + tflite_interpreter, + x_value, + input_details['index'], + output_details['index'], + ) + return y_predictions + + +def main(_): + model_path = os.path.join(_PREFIX_PATH, 'hello_world.tflite') + + x_values = generate_random_input() + + # Calculate the corresponding sine values + y_true_values = np.sin(x_values).astype(np.float32) + + if _USE_TFLITE_INTERPRETER.value: + y_predictions = get_tflite_prediction(model_path, x_values) + plt.plot(x_values, y_predictions, 'b.', label='TFLite Prediction') + else: + y_predictions = get_tflm_prediction(model_path, x_values) + plt.plot(x_values, y_predictions, 'b.', label='TFLM Prediction') + + plt.plot(x_values, y_true_values, 'r.', label='Actual values') + plt.legend() + plt.show() + + +if __name__ == '__main__': + app.run(main) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py new file mode 100644 index 000000000..c17aee4fe --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py @@ -0,0 +1,103 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime +from tflite_micro.tensorflow.lite.micro.examples.hello_world import evaluate + +PREFIX_PATH = resource_loader.get_path_to_datafile('') + + +class HelloWorldQuantModelTest(test_util.TensorFlowTestCase): + model_path = os.path.join(PREFIX_PATH, 'hello_world.tflite') + input_shape = (1, 1) + output_shape = (1, 1) + # Create the tflm interpreter + tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path) + + # Get the metadata like scales and zero_points from the interpreter input/output + # details. + def get_quantization_params(self, interpreter_io_details): + quantize_params = interpreter_io_details.get('quantization_parameters') + scale = quantize_params.get('scales') + zero_point = quantize_params.get('zero_points') + return scale, zero_point + + def test_input(self): + input_details = self.tflm_interpreter.get_input_details(0) + input_scale, input_zero_point = self.get_quantization_params(input_details) + + self.assertAllEqual(input_details['shape'], self.input_shape) + self.assertEqual(input_details['dtype'], np.float32) + self.assertEqual(len(input_scale), 0) + self.assertEqual( + input_details['quantization_parameters']['quantized_dimension'], 0) + self.assertEqual(input_scale.dtype, np.float32) + self.assertEqual(input_zero_point.dtype, np.int32) + + def test_output(self): + output_details = self.tflm_interpreter.get_output_details(0) + output_scale, output_zero_point = self.get_quantization_params( + output_details) + self.assertAllEqual(output_details['shape'], self.output_shape) + self.assertEqual(output_details['dtype'], np.float32) + self.assertEqual(len(output_scale), 0) + self.assertEqual( + output_details['quantization_parameters']['quantized_dimension'], 0) + self.assertEqual(output_scale.dtype, np.float32) + self.assertEqual(output_zero_point.dtype, np.int32) + + def test_interpreter_prediction(self): + x_value = np.float32(0.0) + # Calculate the corresponding sine values + y_true = np.sin(x_value).astype(np.float32) + + input_shape = np.array( + self.tflm_interpreter.get_input_details(0).get('shape')) + + y_pred = evaluate.invoke_tflm_interpreter( + input_shape, + self.tflm_interpreter, + x_value, + input_index=0, + output_index=0, + ) + + epsilon = 0.05 + self.assertNear( + y_true, + y_pred, + epsilon, + 'hello_world model prediction is not close enough to numpy.sin value', + ) + + def test_compare_with_tflite(self): + x_values = evaluate.generate_random_input() + + tflm_y_predictions = evaluate.get_tflm_prediction(self.model_path, + x_values) + + tflite_y_predictions = evaluate.get_tflite_prediction( + self.model_path, x_values) + + self.assertAllEqual(tflm_y_predictions, tflite_y_predictions) + + +if __name__ == '__main__': + test.main() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world.tflite index a35637ab63f4fe043d5397790f2f05bfd8cf067f..7d67c3dfa6364523dc86d74c9e764ffba9c9f0ab 100644 GIT binary patch literal 2864 zcma)8YiyHM7(VT~Ze?A^Xn}TXcS$BJ5y!Zrkk|kU$Os4vVw{dj3u!DJcHdwo{uuER zgC_nVMkDwSHN+oAkf0%9{}c!$#t{XVT~G#Wb?exbZQbYdobPM*ZH(Yqp6{IVp7*@B z^S*EA6+%?MShd0>N`ysZ3%i&rT%fsySI!w&Uzj7r=U6lW&DehgSOAkg-y2fZomejc zc$a6HKz%57B!oFnUx%XyYlEQ!frf~ZzI#zs)oh&GfGi*wiq?l}SEkRGCsmd5a?@wa zYs1mQ4GQ+uZwF{KZKoU}2XF$1!qM7TLsSVL3e_G7#p=U#QOWA?PGP zzkVV_&B4t&^HhlvjHC<126Yu5UPXz4-g~ z>ZQkcf02Gp2YV+(Jab`RylXVWB*sRET6-3|`rB^*m1tXLHFxzs6brICCe{1ayM4zB z-QAZ>dE(ZHBknA=IL~z@tv_|-#uIHlBbkc|{;4bIE?U)IFwpPG%P8z@a}Imd;X-%9 zd#kXp-FNJ;VUUyQsk~!wSUTd!@*V;9bKVlp0FI61_@p+4U5AK({JH`hFGkw1CWYI+a zy)0YCya%htTfGna2JY6`ed2|rBa!1N80+v9J6C(>^{lg>yV~U(>*=?8$J}>%uG$9~ zkJZQ#z60ac;LY!}A3MEa@#mZGby4KA5(-wx8eq){;Xut!Rj>n; zy~v*`zz?MAk~$9s!cpF!c@~^qz44UX+E5H}_zXHN}~AdPTD<+lw{dgf(Z8 ze;O}&*f&UL$a#iTu1v8SDwPllD-?0(@u6C?fBEnav^X(N3}`HQ8brOwz>Xcza?GXcgXzztl4z%4)vjNAvb zfN=|O4tzlXEaC!uTZ=jM4)g~kl`=iRZY7|E4m3n|24YHBL8l4@HwGe+=PGeVUh)wx zX*CuyLY(bV=6bAVlU)ot`>>(+q6f3b>U|U%jPeuXG6!w%;vRk7`53va;9G*T62m#| zqpuuXS6rX7p7WabKX{(=ar38_iZDOcS9Ep-K{DFYG@;p};2!g8HhcJP5%A4sPZNWT zLH&u}n%8kI%5RqTU@2p!9EemP^pSUJ_t!Wttsk^ck?(YVcuR`vK~Rz`oi1 z$B`V=@yqrcsWblGHtGM<6f?^?&HAG3}80 z&PL8keXs7n_J}?Pn)()sDgUlr(2^6$o5$L0H;*=mydl6!2I?6_<658Jdp z#$*?AgQP9^P!Dx*W-m6rA4uuJd*x?uuBB|fmX^L54%Cx@XY}KYE_V~w>^J2A<(>pR zl{e%~4X(fWs`S|@nJXTVsr7hvH|@VPy@#5ai;N-r3w7INhoU}q%ISHCZF*4WQ#W5W z__1=DPM*8)f8dlxUUkd7$HI03AM)%2N&xytJK&;9Iy+P97m0RM0%d?Pt~2Z6^&OXG z&Nx2&HC-3H^MoFzcMeTIbrX!cAK-o3_NVqHR@%s1W}H|3l)kTv_Q-(aTa&U=r%l;1^X=FQBV zd(YgtcW@bFtnHnqI&`dvl`%JSpx?_RjB{Bwz8U5eG@<{&$kZ2R>@ z#{BDWUTS7Axn!y+8SN;UJn-sJ$=s|nKg%^TXkT>lMVDj7{n+Iyds?!`TovyD24kifm)nc_({lI%wQCIcfZsN$K&)3@gf5jnh-+ zDMOw)u~e{RS)MKDwz*Vch3DBsrDJ$?ZrV>}xKt6Puo8E0L zJJJzFuEpd~wa8D(W5#S7*-iFrn9=fk0N=yFC_p~igW)JTk>kDSDIaP6B77*eST5xu zeP@e5r4EoDVh_~-QbKUPjXk6b+1T?N<}fx;?T{|j5{@l;@-QS@{=b(7dSUCeeAT+M(-{W_pKM@PY72TB&pqzmiDaYyk)W=h-s13!l{?P}oQTZw%^m#u=m}ptMw=Z>e z)zY>c!}{nhb{{rd{VwQjaTh-I+#C5p@lNRcP@O>p|0c}u#Q&%7%U^%azwQP8WcM&~ zM~jP^!-D^hyVwmHj&H|b%QL~Bd=M_{{7~&f1b+$hJMpjl`H?REwVU{BZ=4)!Bz`&U7w;AErx$?YNHtLfGy$Xsv4qfQHB57o z_%)#X-k`hDpa1{${OR#q{!-|4$(?7z(Nlqtd@>l1#L!bo*9OAT)A1Ny6a8C4J}BlA QTKfM$aUnnSrf7HkA8AGh=>Px# diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc index 963f7c468..7d7bf3f5b 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc @@ -42,7 +42,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // This pulls in all the operation implementations we need tflite::AllOpsResolver resolver; - constexpr int kTensorArenaSize = 2000; + constexpr int kTensorArenaSize = 2056; uint8_t tensor_arena[kTensorArenaSize]; // Build an interpreter to run the model with @@ -65,17 +65,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // other). TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[1]); - // The input is an 8 bit integer value - TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt8, input->type); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteFloat32, input->type); - // Get the input quantization parameters - float input_scale = input->params.scale; - int input_zero_point = input->params.zero_point; - - // Quantize the input from floating-point to integer - int8_t x_quantized = x / input_scale + input_zero_point; // Place the quantized input in the model's input tensor - input->data.int8[0] = x_quantized; + input->data.f[0] = x; // Run the model and check that it succeeds TfLiteStatus invoke_status = interpreter.Invoke(); @@ -87,16 +80,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[1]); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt8, output->type); - - // Get the output quantization parameters - float output_scale = output->params.scale; - int output_zero_point = output->params.zero_point; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteFloat32, output->type); // Obtain the quantized output from model's output tensor - int8_t y_pred_quantized = output->data.int8[0]; - // Dequantize the output from integer to floating-point - float y_pred = (y_pred_quantized - output_zero_point) * output_scale; + float y_pred = output->data.f[0]; // Check if the output is within a small range of the expected output float epsilon = 0.05f; @@ -105,23 +92,23 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Run inference on several more values and confirm the expected outputs x = 1.f; y_true = sin(x); - input->data.int8[0] = x / input_scale + input_zero_point; + input->data.f[0] = x; interpreter.Invoke(); - y_pred = (output->data.int8[0] - output_zero_point) * output_scale; + y_pred = output->data.f[0]; TF_LITE_MICRO_EXPECT_NEAR(y_true, y_pred, epsilon); x = 3.f; y_true = sin(x); - input->data.int8[0] = x / input_scale + input_zero_point; + input->data.f[0] = x; interpreter.Invoke(); - y_pred = (output->data.int8[0] - output_zero_point) * output_scale; + y_pred = output->data.f[0]; TF_LITE_MICRO_EXPECT_NEAR(y_true, y_pred, epsilon); x = 5.f; y_true = sin(x); - input->data.int8[0] = x / input_scale + input_zero_point; + input->data.f[0] = x; interpreter.Invoke(); - y_pred = (output->data.int8[0] - output_zero_point) * output_scale; + y_pred = output->data.f[0]; TF_LITE_MICRO_EXPECT_NEAR(y_true, y_pred, epsilon); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png new file mode 100644 index 0000000000000000000000000000000000000000..e600a3a61c97883321d8c9bde6ea13dd6f8a8d33 GIT binary patch literal 25264 zcmZs@1yq$^^esvWA|)VQN{SK(q|-Pwf`o*0cXx*Z2Pr{1r9q?{4kg_w-QC^!_WAwq zyYJmGu45R;$NqM#z1Ey-&2@r5$w}ZmA%B8|goGm{2~|WwLSaWjLQZ*%4*q9ao;sXY;Rben>sq$ItZ|{TmA0` z*lg@g*ay!V&%hvBNcvP#(09tN$$N8dn$c_5okxSdwQDM7 z)?a%uWm#Q@+62D1E8>@ZEHrrZJyAWUHpXn4BhKEU)O+1u-#FXpG+He-At4J4`5TPPGtFZu0C5D6b2 zKXUogF)jG@POLiw27{pyF|dQL2$ZAY;48^b3jfz{_FM}3My)#vTrCCBy zEFqr9Ck<@r{>hZDl#8pU7Z6eSP4oD;6;JdjKK{}H1iW@t$k4>Zpn3+YfpIk|t|#U- z>N<%|wltBb1RZ#*nB8(4Ciwe!z7vbkzFTHrSs~`t=l^DcmXN`6HRP`aW{mktS{t!~ zyq9i>`TruYw*X(a#psdVUTvhUuC87RK4V^EtJ|dg+uF*0>4y0EZzT&p4=EplaF3cE z?gkgqVY=m0B{O@QAQzgL4C1LggDZ4Y(5r>Di+hh43rpk;y!&5p7FF)36cma^^jhWAg^zYDPvPs`&BaM4jgRK1t61<>3Mgc_Zq=r@OCt*(v(V%^_Zxg7b~urR_;MC~ z$JZ{qlX_R=O!BE*-nU1c@SEV^VDZ6JVRpXr@|C*N5t%Tb+ZCFQ+%#|ZmchNPO8cgc zHzot5LAX?$$#N!<)C;8s5(bkcdTdthUg0MSlY@qK^G%gAlJDidkXz1sdPiuLp$Rzo zH{Hjl+vvc8ne)4#{&B6y>grF49y!-vXm1Rqe{f4<*J(_~BxF47=UrH>{QO%vn8xQ8 z-}hp{C#|5U?p1N|`+z56?qSj~ue5AaBLA*-tl9Fp9=2d?%{K_@_}p>-`~BIW{yakT zela72t!{CGMWc$P(){1?jl<>loG|*;KWcV*5eE}^sb1soN-HcWDF%XA$suVezw5K5 zK#INyX6AX1!Y`HPYjeL<3cvRoJD9zWchq#AGP0f^5o@ zc6#b4;Is{G|4vl98YQziUHMw{;r5M`l$7OcO{7Vh7tevke3MVpn#1kQ!o&LSf}#w~ zT6-?jAz$@!<65}&S9F|s0jF2MNOSGidd3P=Bd$(1NVXQ1-oJE<{IzP^9Eh{tIeX)< z@%}vui`JL2;!4Bd@m9&LHQVN2?-m^sFH+=cF6M6^JX)-Qv7heGC0Wna*=-KJz@-v^ zT3WLB+#X?Q)jMZ{!7ZmNg28NGef|xtww}AZyI2Uhy{*lB_lV}Ci!K6UISFw-nEyCY zq@`YENk+&htJ@RJygT2J1f0h4q(MZfVK)JiT#A60r>D@zBp#!i^F5*a%au@a?%zWb zbF~gkDS|FJW}`WlQ)L0*tmD6ZL$kNHcRimnFqZ;+xByR)XR?ve!ut-XuIHbQ5zcT_7zyjW>(hf11ApF*ZKnIA7K|kG~Qidyz{OD8~1B*Il$$DgbeAnw@xc}%DJp)4kFrbZ(Qgbc~Uer1s z*QZ;+Lh3^opC#+R2DT2I5sPD@>G|%I1`dX=j<>sdC1|{$zyO@K$6AcbkJ27)Hbb3^E80?T$_GUZz~xXb(&V=Z>jPJ%$ENA7W4^%y%gYiN5ODO%Q8H{x zh{vvu*T(*+zbO_K5D+*x8O@b1cRA3G`uX!jJ6ADVS~gXP|G)`hLnrI~%sP#p^DEQU zHae!*wRG$x`WO3ij+;X**@eK_8yXqaQXWsi-A>?x4!Kw1)WYuXioMvhsoeOzuDMvh zRQ&RJxMx$(VFH$tDG@=9O)V5})EissedmsjP5O3kx^io}vViwP%yYdn)n74uBcy!~ zcKkuiy=mW0m^^~@dBe=wbD@j6>^P4xynnG_)x=q;;T6&|CTiqkv7u;JbXlrev!)MN z2VI@?=Vv5SzOhT-9el`SL&ROEJ97xQ(9gi;bZh)n{1isc{lse*oUhq*1#_I1bHWT3fobg3ER3QOVRV7n_sf=JE%fvG#X(Dkm-?859S$ z%$@x8kDVNx9k6WH=9&_VPef=WpDYaf%A0nYv8^j*bmv9BEkG0Wxz%g%FzY6$dGP+o zB(*6c7g(eP*45S3EU7822ppQ0I04d^YU_TmirU-b-h6OarKtc;_`fcy$&j2F|D*mB zEcpsRFm@dNJp61zdj3Zdq^Mm&#XOH|Itjp=77KQ$N8L>s8NO-QOIj z*V;1}bcD8O`!9qdW-fHO_*J>^OANwEuJ@5uS{dwmY@Zu+ zh8sQH-}NQ)mk(`F3Ocj&CvqEf{dj?}!!o1ZGQC!G_p@!Wv+aLoL%-h5z3vRc9l;g z6RgB0A2C8Ck!n}M1x^3`j_KX&ilnoiz@g;(0i0442yo_OdGFla1%U5a&5RJq&~9)? z_%bfDk#DQ3hTb=O)%p=_$-TbFsA@H~FG5JzbAW?$+?|XZ8&d{``Zzx1yK1p^f00%_ z7@laQBWxu@tm?yquLn6M)>)(g{IfpKiPPQA2^*_mhm%^Bt6nVdF?J^1Aq@^Jx}_ni_OOJ z@b9kA40b0=-K>BY;t_T~UG#alNn4{#{nsS}a2-HW>_;?^qoU|%q>$#iVm92IV~Ceq zfNkG#41)uY*%4{8iDo}vj>6HotNR(}jDf+|gJMNYuco_m3WPJ!YYk)z5f;16E=lVq zWRm*?UageaP({9zS1jL1 ze2-eMTeo zh17O)E08~5#c&Z^CW_y-stx{n=UnIf?{O#$R_(^^#t*!d$N6q|eUr_Ci1YqzHv;A~ zxSx>%gT=t3UIkyG=pHV%2tC}Mgp$1p1W@&)1%swUzx@fg@hkn1c0A+30eQ^<;*X52 z2~OU@GOdBe{v&_LD$G##X3_+cy86DU>IAd~^~W@mk%@WpQw)8x;eU&Unt~M^E|+6( zbdoI*&P8%!Z2W%TS|%lio>mb>`o6ErlY+;xMCj_xQE{dRAt7NH0P1zZIlzC51LqLd zbhl^IvD_XKa+WCYS%A+~;LMG`nqpv?`VkeG8L7*>87UbV1EHpKm0n+p1?hyg#vi_W zGy|%XP(GCnZXLS-xgz2R%nqe&hKiW6{1Fk2lAYJ>>W6gqqqz|`$ltLx>jy(6b}^FZM&(^RYmzI#X5XuW>C!b3d6o8RL z(**?!b#SmTa|M?o$6SVCPal&v%wB({5r?kcGP~iY!9u@z-}RLT-{0tz@6i+0CR8{u zv3gy{OUbCjF06mqbylkxy1m6@wBr}Wf5)Tbc9|?CYcsD)=5;(xK*()GPiy}IOlBfG zXFcu&!y(c1bzyD14wk%Tozt6YuBP)ZZ{agcOi4?#Ue&cXb;cHg{i>qDg&&g~7$GiY z{#0BWrJauPnUW>tQl=a0RDbXkDzT!dO8+pJ&|INQVs}E|`7B@Za0bMOrKP3Q&Ed?+h<`~w zrb@@O+L4J*ggp{z1f98NymL5R%9dwGp(`B;5>gmHH&P9Hk1f4_<#f__pjQYcb_9Qk zlua>DU5LJ#aMun(<*|f_eupsw2$udK=y+Iy<_OT*OF>G^=hE6)ZEHm~Zab_eD_xPc zzSGKU(VcDWNG~vhD+R((J^-j3931e6`y1*s2N0wtEqTD*PnbCG zWp4uH`4XVQlj+9GWh?+Z^3=;20W9Sey}vZjexR^{zbiNH&-;{)041zcT2-~gYGS{9V7V=Up))Gt~ zu|;j|?WKS{jTh_4g9w7G212xReF}&Q|BJRLuTBWN9n%A*i@-^q$K6a&F)@4+l1`Ao z2syn3k?d&U;XV$Bf@if7fGGJtTJ^DwUN$9DyQUi>+Iw!gpFItWbH!`jP zwyyz15)sJ23k?8#q~y2%1`sZ)Eg(w!@DsV~i-W=axq1You00#qz?@dJVbBsDTu|1V zxFBZZ3N3-`{4GDQ`;boLkfKZ4;Bj*!^HO1IDv-nJw;qcb&t39A{}f#o+TlWrR&*a9 zB7!H zi3x@85n%MBGu1Y{NCv=^s0|JSqJEbYM3U4#$Hl-Vj~2E6G=y?J9Dw&kYem1{Ggd77 z#TUTk8)_o85A7sA+(Y z4>x*wct6}-&^E~dZWpLmV%$%zudhE@VMZt~dgrM90ARIRwKWyM&-9#}QBR0o*#g++ z;TblYm8-jJ(&@E5#!Ns!;JTfkJ4>xo?`#FqYU<$mBwm~KhKqTFfn@$r3f^#lJVr|N zT4{Z({NB^C+rP8-DJH52VGLkoLys5y{$ALwg)-rKgH*7RUz}LgRkKSY$zV4d8Hw9s zUDUb#$w>}3WSQ4lurJMqkww@0R6pJ3^FU$|5aU-!{Yktg z0K!qH<$^o_u-lr7e;@}3h(J9(@*|QKl&sv`lf|!i1mLEAR@D*%2uqW=fmjSMOT@)P zWF8R10P4#C=I!hjmX$?sUAsn{$Za7G5ZWZjq;zf+C}g7h_D-jiuK*a&t>b;gfJ4sp z9B|+@=c9HKRAaA1ko-6-_PGn85_JAlWvQ-0^v4Jv8V&Dy#P9g{o;(34M0XZ&Ve-~qQayQANBn(nDddSv9%L6;Yk;LM%I(y!Rg zgP(+sA17@L2lF zrNHsCzhx|V--H1=J6xjop(l?0;J_C8qc^!?k2{R8^S#8xShVHaBBpUoF2O{V~8hHdq*}~8ad(r`wB%8%d8)4ur}}c}vCrRF>+#(v4#zbx zl4~mY@7ctMi~SZC<7}9u6u^jLWoAEj?jD_H(l)tyH;)&R+c5_+DjhJNvSjTSphumh zf336t@m{D;0h(2z2%fi}bGk~KXn*IUC>&9TP4yc;Zn0lc|% z>WP*<`!cs&?oR*_6-xERdZKGfPCs;qsFl6?!xKeTVI01(mBHS4=&Y9Ax{>zb;l9MR zR6~{TkfZc8O!zI_14j^_7qDEEE*V|uEi(&CC;+NjwgiQ%VzaB`4$OIgLBHv6Smv}& z{{AC6ODyD<998^sBUZl1RMx{7+GJQOGJ6A>jt@?QN9H&2S%5apCyM$3qS|*RD1{Zr zuxjueMDJpygW(K~YR4UrN&iv~?r1JAOZUGE5#e^@{G)@HRwFk$h17VTSovXe^WM|D zs?3qOt>e9WolR@>;!|pB@0J~GC`thF?)Rj;m*uiDMTO8ev0LLL3+}H0Pfco;&a_(* za;Ea}4%SF=ei1Bb_6xjiat{#gtE*8VN$eS6c@p89KgEAvr7W*3^}J@ScRxWd9_}sF zs{a9{9|MF+CW$9uM>CM9>aEMt`$C7Kfa0dWj^sRMp!i|D`V`N}QiKk=xik_g6 zbzqpV(Y7`$Q8~~qudlzD(l&9$Tdj7k>and5J~tF%HKRFGi7Ews9~MP|+&9f8$8^dR zpyxXIZtQoA(JFX7Cn*mut z_587ssFVD{XZ&fM^bA?fR=vg)kO)8DD!CBJ zL#1QIC;zCu;?_jr$g<{DiZd2p)!+MfPP5Oc%V<#wVq){Dl7t)_ay7PAMcwI-kdJxXK#ZmTVr5CO$kIwO=oCd?;@P>m8d z6!v&W-%+csxz4FEToRrwj&nKe&O}B=_Dbj_Epjg3!2-D(vly-;#iE_daGY1}wE*Q& z4wazLMWOmiJs-%0z3rAUTe8)CIM(Vd`Z}gJ$9^p%lr_0`xS2Wb^>7`hz9|7J-^^~z ztk!kiE3|VG2^nI^>83r;HvjdO^^!tWA|e8W+O;iA3a?5WBL;H;e%{s#1z zinXzWN6d_?0eBv-pzQpO&A^sDKPvCBYmZC%R?{KKnL1eVE!Zt-MWv<1cfCa^70>lH z3`HJht6xYVeGm4du&LO6f=cV0&0$af`wpN288AWh%AT?*B=k}wkdIcL5QE?uSE#3O z;8yGJC#dX!o39KGDtL6OigXl-2fvr+`1RKBqL9^80jtWzJ@Ijl;3K5}hRP3ZLj6HN zK!ay7{^`)h4E544(Xq|RE@!*$@!M(cpSAHHH3ucw_IPxKl=$R6Z_f%$i?#&Ao&j2v zt~<-8R^CxKg?_rm9aj~A^?5?9Pd)P+T1}D>pIw?r5343`M)f+W82#Z(H%#XMYE)eV zymg2S^xJkFtb57V54C>~Pw}!=A}jj}!igW5p>RyFOF+PqgtJ(QZm@OtBPA4@&-N`H zn|@CZ=U_z-cZh>qU1bXX(1sl9@uH2q@)D21#H!z zY;!}~xA-#wQ_$s$vyoC<{ki;7VRc_hDQ#^=#w;(n)r;}#Wv88AA21Fu7-$u-wdJ*A z*>wg$RU@9yjyBEvni)ZaEhdU6h26(i41O!;|3zfWtbtylp}G}Y4i(*X*WK?1d9t2_ zD4USPx!x`3a|_z~=WBN%p5{q-f>SHE7d@?C>rQv??o>G$;Ol_mWrA8S{Bk)&`Hxzp zSO~G3XK251rb^)#w`xGv+yxQy4bJ`UAg4=ql`GwJl@p;K^(!*jQjVcW4cTGiZ#)sl z?f7t=Z3<4_;qJ3NMZ!qmrGKkyKvZA?67`N-BMc9ZTK{g2WMgSmSsq>10FtfHil%iofQ#T3Hw z%XX*P!fA&y+Ckz+Q2OVt+-_G#oiyFu-F6iG`5;9Wghx~5`Cq~cB&%Fc`gz^1*5d30 zQrbE?mJdG>JO97c8e)8Wg=HOy<^*Ail~c27V^r7h^gh}q9*Mi_AJMDzL-eqzJT1)+O@{$#8UiIlL(N!tncH09*qHzzJG-di5N2Jezd{28E@c-Wb_m&q>1YlP zNSh$!3-h?#i&lv#QoGZY$tjI|e%lek&G%CGFzNSF_MDD;PX^O%jBfBbEc%=tJsFFWU-TF2sFvd1;Uq#F)_Z02q-{vN5kMT%gny>&YVe6E}oe7cz;hEfF7z{~T z8)bU?-_wtbdsF69Hb~`U?Ib#vJZ#Lul#}Y+{jpv+e9fvd$`@i#Px)>cUukLo7aw$S zKd@AbBf=sn93tL}+h&1|?Yq=hI_qYL8W@{)u=4{1^`;rg@9b{~V_E#__+Tyi)fk5} zVnk+VT8RWCM~Qv1t`}NJf-|OK*n+b~W(o%+IzbGy8If^sE6jdgaeQZ~wi>Q|rRfp| z9{xR>_QAI^!BFdhfn!(|2-a4C`_!ex&HL1HD-`mwmvawE==4YN`zXcjx}9TDpCs3j zV5g6|8JSDVA^kL8@bPIBj|tu?DuNV^Lm^f(6@0sDz8wh`7I5iGBf*sYQ8sj^xyLE% zHcQP4+GGznk94?4C3PoRt*)~M)}sN*NCx7-YshknPzb7M)ppqjPDhvzv2^M z?&o~-LYPXx5kg2_O*5|f>I)Ms$u9tTzLZcrS1BHNfd5CKti~89YyLQ)(ugzYGqP}S zJR;Y2@4o(PZ7kV6sWHNjSFun>uR*HrCfJ$+UJR{jHti z!WoxcNRXK(Cl!8VR4zBWcT|*qI6Q&^;{a|zJ)YFNA~fIZM;=S^uroJfFcIEu;YkXE zMXN|_-#)M2ZDP`ZwY!~XMfM{A5-%h&0~WDS5R$0QIMu&!O=7@4v8;uE#Kyqx4RW-# z?1{4OrT$cN)ZDqj&9^nWe&Sm6&$Dv=e)jPkxY0a_ZT^O;6pmxqZfTW9jDMhTyo|Bn zV>8zXqvtg39SzMon`XA%DJ;?ZdgZ%nxPZ>Q-17VLdMCw|uUV+w4qoeG6MOa*-z$9P zkJ0c;wIJM~;xrErt_1p0dYiH?bWmh7#)pK}fb#WP?CoH$L&tHVh(y|n0vD|!IoPH2 z3a&}`RGp1vn|UuPse0z49|USqQH$SS_YF3!N%vZ?30KU_y3x}vKa$q=+}x`czKF3= zo2koGVLd>jvqwJP^=l7V>U73Tb?N+=xZ3*zn)M7U;DeoA>dmfj<{X6;4;AWj#}Jb^t9Puqey7JNAN~42+Sm3zp1fS7Mu{@gGA|axN$n}nob#o zQWt)u0QVJ4v-H&Q36LP2-9kpHD7Z=BH&O<6Z5}<8WC0`Z`o^8phj@O^L7_l}v$L|GoLFu2FuC~kA z%j9ghBjIWMA|&A2rhQ>_Zpr7|ck3xM%F>IX*>0^r1doIz(}3wlX>d~M60FpIpBpC; zIghtNAvlWkc@TU!v1Bt8`1c9hSQz8aFO-#yB|4%Dj~JFQ=)~ihe*2u@%qB)HUj6-C;7Jk<`-aBA&hTgP z{oIl4v5?VeGiu{w33^CdTk~gC^$V|?!klnumKq>@C28KAqTz;&yphIU!!>r()U4S9 z{c97;r$g0O3V08yP$&yv^Y3smWRr~dA&CQ~I|^|YMSk5sy{HmlI@#GRNi%zYI<84W zwPdOZ$+lL&Jun4a^PRf-qT_oH9*_HuaMC5CK0pWhfN z4fT(SiX(8muDCiv53%1wnzS5hY&bw6zF zGIE^If|y6Mw-R8!+}bi$n0rFB__&g#IKTeJYfp$E(&2wS+L(&>hrMo#aRH5vpc&-6 zd>+2Wt9unjU?TxBWE@D5J^k;&R0A&9qj3_m!Vt;S9q~5Z`a9>H0jn8LiclCD71;ad zh-~MQ0wbaI=WhM6mxWIF{4+bntwZV7{rs5{o2t>{eD6>M36fsS5yB3DBb9$+@^xeHo5Te^^&v`W~%# zkwr;<3jWTC!4ww-<#C!LAVtlafG5(>5=OoS;|hUs*ZbEu@Hl3U6L;mjxVLE)AJDv^_bxoXz_-y_oIkopR%FVJJeV8E`6AXCRl zCP;HGvL{ZGwCOLZHP3Xgby;~i-4v`qe(9J5p92_l&VShP-DpH%wZzPRgDRGj7#r05 zm4u4P;VUml$#BZWW$N`QlUT0~6b;-`7Et3QMN)Eu&SZ+gP1|#(PxT(@`Rs{4>FoAI zS0D+BZGyp~;+DI5IOW3|LyX2r=;=+z#8y!l7+P|+l1>QXG?)0y28jh659Kv!3HYMn z&ps$W3thpCu+Vbd<)D$EC-}noraxs^g~$b&QN_nEo-{3xo)%fMTZUuO4ibMA>s1v_ zHEi=a5~~~_LOZNdKGy5vd%H~H2E=>yYj@aoer6(Tdr!qh!oFqAri@^emix~Ga~=+n z%+FoT&f|Q;!DC&qPa&jcp#_zG+mbR;PHH6V<+vNq{(&G8Y4zvoS^$amuXkV3quF&* zo%g0ABO?_y&x)Jw1;QwJbAXT=Q5ddv*g3ixo`3O%RYD8f5@MY^xPp)QIY0kLB(hgp z_);!~d+iy8QtpwFIU}I0-!K!!2$;j*sc;q`ipQrDhk#n22q;Z`sW5#9wA1|Vr+I5< zh!X68-~l~9e*!2$+5$T10DhkU-0do;9D!>jU|M4ppiz%3yQDw~_B4yWy{-9h$#&}U zX7<{Wlxu4u-0FiO5E3fG<+7fYkp(W9>G=ekP83lA=?6m2g~XR42beIafaA*up<~$0 z87QnHgdADfYjN{U>xmYoQ)LY7Kn23DbMSxU^I{0D$Q$EJ{0q`HSH>F?EH;~o|0_F5 zuVzEDo(G|B|3eMYQSu=WM{V3#)bSBAG&Jpp!{qrWjrV@!FcDkNXn6o~;hMGmAl{qt#{46VwVbu4|Q6Cj&axm)9aJriPD2^Z!B_pHR_^TMb zQrf{*`;&D}(#HnaZ}@c!23=MZB|o0IWYNB0dqXI4-L;^wdSrgA2uj-^FVPCf`N=d8 zx9?08=Qaen+yl{Jy(V6X^_)P&L>v$$098fJtaRcW?#V{$lC2=cRvY;9WZkSzvTX)P z$x51Kb#e7gu=dE>JpVrYZlW;eGvn(QZ5^l0?h(Fl-9Ks7IW~gdq0otkJ_jlWpe>!Q zvg&;^DAc+&n!7boOoMbW>wxF9Gj0SXgM>}N6AB93oIoAme!AIWQQXi03e54M3w10_ z#q>-cCW=Z-B5L2N28Y0GR__;P${v|}Mv7f#mE+>t3iPkcPZXWW`_iU^bvE8l*A>f< zG{IlE3%h|=;dO>n8Gs8&DdHIq@+f8CLc_&sF>ooLv#|Iez2xDE1+syFqm|ARP-Q0| zB5L=!KcPXCuY-$zC+s6cYSM zAZ9R`5;9U?4a6QkCW#~fnO_VVwF_4DIcM&L4PLdgF{7}6BHjQ5P{2(3)GI3yzCccUSsDnd}M}0p! z7Rj|pDhjAnNq*|?@ec@8zsM$}$Q!)4eaBa@**%xFu2pRkv9a+qlGgQHeFw;E?ixMB zu$;x{^Y|#&BVW~0gf}uoNuKMs3T=7OoYnf@8JgVOT@*+j+Vl1CPXfW8?V_*hy@Oc` zD02Oo&4^rDvgOTp3&!&OPZB{iqZ1s`6<|6uUMGwst_4T}3@v@taI%Re)%v7IUB;y! zEvI|E{mUt>eIv;K4!ziE)NA!5|IP$D!{}d5?b)htsHCzVw(8l@zF3M;{WGIcg$+|6 z`EQRbH$M$}sfblVvuv4}ORQ6z7E1I0AzgUD1)@;m%5xIdNutO_9tXxy>b>z@wEex< zkF7hqvkvmjw=FLTYCyI?b+E2TX08qz>r&!UY~i3!%i#RG9#j=2w>Zq7fs($41F7xY zbZIC>?q4Tbv8WiMlo}+8=}>za&1ROeod|qD4M6Qhh;nv1E-pW$*4Aa{dEpWfb97L? z3M%P|3PuE;jQoY7P{4oFWq*NO&_vSe_Xr!QuMYA`lWEzn6?rsA-MKLP^~|B?v$q(7 zZebIk6)y5u+_4*Wu|vHP%4nENFyqe>2rTr}&6Kv~#p~Bi#|FsgJm||;FMZpf50rMG zMhNv!M7XQ>G}Z{;O6}Ix;5Ey~mLN2JjNS3N$XDtooF?(VqsS8zOXw^_ z+HH5&26ViQP$Wc(K2PZ8&ssFTRvnVD*v_t{>!sf2G!ZfT`NK8GzyDSdg@TIyR*RDI z`QPi}vJ%MUy+=P)@Uzt|q>e*dU;!A;JT^Q@`UVX*3liY=WGQ#w^&Z~mJ8N6>%-Ju2 zTBJto{!*%k?zXa35;1b00?BJUVQ5di^Y*{5B3^wO4eowQ>{4{#1l&6!f|!*w6YU@8 ziri=s)39lpGK$bfqe9QU}lhIXrNu+w%0VkHwg)cR~xZfCNDKMKMW=YpS?yHWmHDS z;#P?Qbp-Y%#rf}puLMWcACrWfQ_|s`jlFDd;=0kE{P)t|V|EiznlhA6OERnKo1{9;Rv zBEe@S+I2o*$pn=`ZaX*3P-IURGcHxFwZTm&d?kcwlx1dgbIZO-9EX(SFA07OHaI>- za}XY#d2S&tI7%MIhh87ckH?8bt-e-SRT_PH-&0!BTWcP4!yLl@tu){qNz=2GzrgV{ zuQ;uTcW)U2j zENGjy-I&#CJ33U<5p`;6{km8;2XUg=F=Wo@Wj`oG8X>xrN)T*PE=f8FfuV7quiMG& zOlxbc8PxDoM9u1dXR4@4$wHfOb0yk)-vXD^qU1|i*K?wM*RtdrL{qizv6;S@`uY_j z9z{zZ9X+MMGE)jJbi z*@Wd0W2B#M(dwbg^Bx~e?qU&x{sXd%-)*ESw!q!D_9Xy&%2EUQaQ#RN&GzYZxx)xOp6$Yd2u^}>>xF3-Tm@=_%xe^>#gsZ(#UE_24U{p!{v%*xb zcB%nRfUVZ;3q)vr)v!!Al5vO@r+;cWZRkzeN#C1wBU}dwQ9xqRp}q~+dV<*IsMm~v zG#LV4oX@Vd9*NvwU!P&6q`P2RPiSE{+5LE%X!Ia$jryDgd%F6<$Z$Rv%x9Gy7eljr z!>G58J!MjoD*d%q>vR)@QlP{M}>E7QCC9w4%R)E0MKddIz-`P}) zP(b`|QzUE}w_2N_e+-RBZiniv`_W5wipYc-{toz|rgLjWBO+XvPDV!Gi*xnRvRR(F zAfmB$&6=6=TpI&IyEnSNM&+z8{D@^Fh6_|T!~Vxyj!Ek0DD@YtL-Yh$=dN3Q@ix1N zRO2<2#cmN|h(XcneUDv^g^pzmaC4wfBoSo(m*R|Z?45#VXX^KUFUlBLEu%%()6HjF z&HP5+&#LY>O&vI7VL*sRYfoHHMJ~GDxc{iHl6`mjk=eMQ5EPy+PXmJi$<9(wLjmgo z9A~ljrchoAj~nrVKW&{K_NG&Ba^=pnSQLK)-90Fo`aDQCmcp%>Sk*3MHY4hMf$`uE zKABOhFbes-|9zrA6-V?g+NVjB8GD1jdIwwgDHiCSbSb_1iFgnp{yCo!zKEdOsaMNV zc3+FR9o$(41;@vN7VOE$Sy5e|gE_MGYOxTmGt0KPY2OYx>*%0O3Dv3B^@*k z;>6lazs}6HcM>Z_rgr9^QSEDQi@bwBG^sipP|=q+Bl_yeZ8zrDE4AtHXj> zxartx5k{5V`nMG`RstP#SyP{zu_y`{8u^tsc|pTlWH z<$#F%mMR?f?Z=q%rL8#Qu*rxSN7e`J<*2A-?><>emaNLV8)=&})$E|!nht0mJrk1ykg~t083jsTOP-`&E6|EHkF5kc ze6q4W?Id1%Hsg58dtLur9%*ncOk0ln6&;0gww)k>ESrWa6ZbbLP1DfJj#+Cbs<(iS z9jJN{t@wROyr8xE@+&CI;u8|KzJ64&pPUUU<3NWB1GUq&%Fln)O1m;&l7qcZv*r8c z?G?xR!iPW(^?W0&*%as-dzK)CasCHgcT##HsE@1N--?TOQ8lY2UmS4GKSA{9BAO}s z`eZXQGm$~#Ca#^G9q58YbcecnS^_;{{^x?&>i2~BwYDY)Mvr=ae4zK>gQZNWF|a8* zs;d_T%Xo8ZsiN!3$YR&FW!J!YtSXFpiGd(KHm(K<)bs;NN+4C%b6++M;#f7rLH}g! z^>zUn=vM0=}6ZFOO1>#U3y5eNw z*w%{b_m5rgD$GVfu4_yCBEwnlVtqZ&nwOrD-xofN5&$(F=&MjXAmB0)CfFk>!ouW4p zSuoBwOVbv5eWWM%H;z@F4m>4x>YiH#ITqQI7TV(Blv0X{is=)#dp14@u?0fVk$neR zSXaSEK)f1vj|HR{xx{3Ux~OLPN%cMGG61UTT3XRGprY+!r9I#NrSvVW^88UQ;q^IA zJ~`zwm6TM8q13(C33jFpYjGt`W3}=Dv@4XvYqr_zi=pGBAD6Jb4-FtT!22P7ogou67&`yI}-whYb@fEE-s%JE6!d& zL=4I%o9uL~kGijKHt)q6fDiFO9}tntV8cya)*H-ZEB}8QJMVBR$$G}IM;Q~ z=|1=O{*KRhzh7*;W%FLWo+ngR7Km&0r=~XF`>@G>qB>^n=c>~vL8{Cip9uM77Kx@4 zl2sM%7DOyn&A$mNr%|wBA2uEEnLQ^xbreYIOEQIhF%8gJ;Y`+O%A2Qtms*g8){FBt z($Y7CleO04ovLvda!{(0@7|wtV{LkJGvGvs|{7+*9lT{5V} zJB{|Ql=_&FrI{~;W=AM1ADG2+K9y&qnSA=URp|Tnhs*iH{2-_7nY7mDoFE=$RQM0R+<}0FJ}EL zJMu{|!NCyLItb$IjUMI9Y~neA;amg(ho?YDHl37+jk#9I0LOLH?4 z`WT<}>l_&Fx2HhUs@%qc_MM!#VKKWrqL77_XJb6|v81Huo~C~GdI(-}Izy>Ey@3q5 ztY=q^fYx0G_S#g*?>3pvYxT)jC~oWMSnITuera!(=PUKm{xJs1@u?Gh%E%$O5rf}p zkJZdGI!24?u{>4o%DVD(KC<)QYj5?3`5!$n=*)fG%PA_J%zu2r$kBgU6;GXA^((%) z%QO3XmEWm^6soUZh1@TJ$WOAqW9avS%5u2(FWsAo7TQ5T-w_OZ7C5~vvzz7>m@amsY|;h(qMP&3;S zCYhlju&(!|{0`JLYo)FRB5{VG`i%f11y<|o08uJWadVz>yDN8-`;}+~Rn@U9jzZn# zVh$816{#8(_R8?2Z?n~l<2O7AkPK`RCGTZmeR0S$jw8|->`fxY#ZhUi!Mh6q(yX}q zgVR3CV+yVwz8^P3e_^eA28O?HzKVwHnI;J}yDmRu!mYlkks7Tt

VYxU=3~N}Kp? z>$PHdz6Vo#N9$Xi1~S2gqk)Pp;}p)z%DrdGoT6;8`Vw6`oT$^a2NNq*dBpT2(ebLC z>#=HrQ|oo&UM<8XnIe=_{)2Z=4o&5@>vMB6XC7J4D%>_BT5^6-X}2}}Ly0r9qZX;mr)Ek@DApn`#7+g>WHgMQinl;gra7g> z#q~KmS))RY65~3`b#LrN5z!~76QGfzH+v{3)aEwlx>i4CqK6dxLUs)v%eDE`Ga0pq zB^)w-J5Wig=zWKt%%gto|DwsFvb7~>_{pFZO{+xzNp{64*(>oPNbq@ZZ_gD(!virU zRQqdE*;dSFK5` zQJsKveWMdLR<2s~s(vVC6PY!JRhm=gZjOq#KQiwXKTejr?o1Zt;VyPdUSVmnpfB7E zE-P!vT~z4Ro-?$pZ~g91s@=@KhLi&5Y#zzhZPDZ@uwwI{c=t0-J&auopoxh&bGF&- zud21kGkiX6VkoV7AFJTC3&`Cc1J5iNkM1RcPZh$@$E@0maYz#9xmp;GWq!78Jd}0) zq*EZ5FEU4HZ1wYs>?;Tpx7E#V#atT3j4yo&+F8r2#OCe z6r^K<%xxJO`Mw+3W99NY{F#c08>M(kv`_T{5e?(f)d%tEttTW61ZMcLu|2iv-zvs? zuHL`fVf0fj-~C-U_7C5-+!EM9KAzb}l9FhG`0W9&3l(>7KEGx0=iW&&}Os8kvMC#zEZq3YumH;f-6Wi?el%)Z0O zK54ONc(milS3fI;*ehin_`9_gfmmJ=-CG$toNtX7FKC?kOQ)ih<~wyKqYLzOfLMD` zCACw5XL>qhu?5A7(p1tUhdtxTns{gDvjQSCzki9I>^+zzg|?ita@pnqK`O(Ke+;O` z>IO)+7;XKD)XhmD`{x2o0@xCHQ^ z2Jk#}xLjACetxvuQS9Fz+C7t}wAY_4$wq6e$~2Kv}L=+3>|Ko_wj`z(jq&~AMC{-rwI>q9V^&bHZwadML$XHV;~ z&9)TZHz%qo^0ze(zC&?Az)XNlAfHpHtEyDW22XB#Ng!jM`L{NQo(Kabtsll4R@ z1><%UTgkeA3lc=HaAPa@au_2O?4H~VMsb~XT!&DnWj|NXpPKVAZ8#lx>8Yd7Xgh~l z{`K)(tvjp?&rDSy zQYfDfq35%bTx`RBMhoYxlUmutVmdXwYNY3M-_s^Ci<jx)%3^gVDz zAa`zdcDw5X?DjzWV2l+Nrcej-R$LXaR0J%Ez;m^1D0aSsWL&QrW?d8(Uq(TglBA7eo+&(@lo%qm~#<6x}iCQ$5hnNMuU)?JFQGoupwTi})@0?w#{|1a5;2 z+6Oya_2A1nzP?$tc*8g#@MrlNgpG%dQPHE~Z01Rt`vJ59|nvp-yhuI@xK zn|-iMb~cV%AYAnvwqomEbw}GbEqgg;iRYcFrq?bqf*TQ`3rd;IT&{{fsru%626+#* z$DH|z@z_2r^?*E6#lPAeW<_@KdmC4*W7}x@myP^b?I&!EngaJ8tvpH(jssUo0rvAFWbGT|L zZW=du+Y7swDxCKzQZWvQMyA@SdzGyLBn9LezPh0{<=@mkSKD8fdoX&{r~bvd^D*C> zo!FCCm5EuUv~A~pZqSDs3-fdZ`>P?gN*KME*x4q+QnJamP*6OG)66rsQblqKnOzY2 zG1{jiz?mk7>*aL5evuVrJ37EOaEFX&_tCIj!FcRaY*CxHUZy6fE8*NuXHRX8Sc~L& zc!l?c)9(ri2awpB2I|Z4@K{qPzJ8%lzs^b6R)~CDnS!2{$YdnqLqe*9W;tEUaqvls zIh>Y?UuH$UkSSLU4JTw}l2HPC4|4qJCPUY7hQN?MnO-J>xM_8g$zj~}{R-C#hr&4H zU^N6I1+_ha4<@Rv3X1!^F}_OtHSb{j;&dCg0$382Gvn0Nu_}!|nsT|)AF}Yql(!Zm zP2c`qQPnini|^5JGwF4xOzNMfN0H_k5Kh*FehkqPGm^l!ce9(4f<>B|4tWj{h z$oYk+^6YGn`#w3J*1abP$SRuO^fe40j_Vz45|@t;XS2EJ%`@T(RyxW?cVlFuzRvnV zOKPg_oMIcnR5r^_i68a_?=xCj?9L0KVqAH2BG0YtQ!Fg%NJRy*_-J-ZrsF`|dj;9M z9(cJyRS^{*|1JL%)O$n1-Rc|T&L1x-8u;-Uo{GD4&l!fnm+?Ry3V&H>-LbB>Dc1Jx zR`{K-eX~OSoUn=;p7iP@SezG-ZnDj$?U%)bcF(nEc*sOE=7z8eO8WoI_ ziiw*74!}+p(7ER`ED3}<$r0-B!H7V^?5d~xIi8tL)?A+$Oia>KDZi#_p= zz5CeINSJ554hJ$Io5t1YT4z`H;eIdA#;DKW(`PHE%{rcMrg@0?l|KZ3!DbAD&~;JQ<$@DNuZz7Z;BQJm4GE!lN4V z0cf^jxS~=|7**mbJEA|ZuI8Lg3vb-R60T&DU=5lie&U1dB|Mk;Ln3z$49MlKRNwXj zw*n#*Km;n~x53W_Lt6~BsK`j!|4@*GhNx~pIvW$G8MC1k8>KO+?zI);ANk!R3PcWtjAzDw32pX#bE9y44kN zq3Q~1r4v3E(!S)qcT?<0Hd)&w{(y1enlrpfRALgt04)oNT=M0p_JKQj3*VxzvjfzR z-fRE&a$!AvZq`4EMJlvRO~Jql%^ zj~cZXRz9manlPxPD|FT@f)4)l+EYvB;jF70|AaFjvl%}QV|)g6Np9s|Mk=d2kp zBA6e@3n+rHbKC*J=9P{DCI-;~lmHqgRYS!e$fR3AYJ0P4Ji5 zoLrf9a&(!h-8n$l!6g;hQ5;X%gLA>VId(l8ykBMu0N$J){FpNuc zWWRp6Tv+5)j#1E6CVsdS*Es&YhgnoI?(9_eK(qDw~CZf^u=GCC4 z<_Q2rx0N05WDUks8;k={s)guwpUDZ%XCfjq>3;zm)5{9f7^rB4sK2}Csb*pE+Q0hZ z#J37Si`6|Ku-uqy#{iTwX9JLy%rKS35KaJaAHO2?q_AH#KroXemtNOI0sU`u4*DVqEsio~6neg?7E7q6t%PnJ{Jej3$zAMN?W- zz9jFgw)@QdG^uO>LNlK_;FMCoh@%4?n{8oIbOx4Dofio*I!S<77{Ow`_^G9^^mJ-h zm+*se|D;iao6PxWhXj8>$W$FZN;T0NgN_*#k_Kycs$&z28b9n)UJ#z0wPver8SY#8 zN@UrQppimzfModRxLSsiRIAC=J*%Ya#e5s~FplGO7@U~)>KHTc-8f#7`*1cW-_U3l zjV`F8S#21%pDt-n`JE&?k{4GCkz+CqbCU1t%d2Ba{zN7@&&w{}>C2XZENkvI4-c++ zh{WV^G3+qo5~ZINo4C_2=j!8<6Ml@lf48{*=A8Rcpc&9=p4Mt$OvzZ|n4U0@bIu)3 z+EXPZeQPe*h*Y4;67)*b)Bws zn?52$cLS=45N>DP4IaihU1Q)EXM#5^2yH1E7Bfdo4UV#tiU*g0Vgd$7uI)Nd^qm15 zB^cnqo2YBi&vI$&kSC= zgMt9{SigM$Z5Q(IzK?zX=^Kmtx1^r%RP(%s`nWLYH(`n5*R+qt z{MQS@*Q>M-{sd0^<9iOsv<5W~QY%ze4hTb5)IT$?d0zid4`-ti1sTVaEoyRdas=uH zbZA7A<{M0>4Uj|?Acq0f3HP&G5D$9;{A<|oFd(#Cov0QAq*I}%J{Tb}_*9PrHvP+9 zfWd;XqT*1Pn9d-62FPR9m%TVw2z-hgTPc_j-XC-}=1RH+5NHX~f&h%|o@^dX#zGd^ zzczzTXY5UawC?`kYZ5@v1T-pB6i=MTK47m7 z*>o@O0*7jGAcKg}7#`a)+rgG;bK*=u{*G1wjPj7}>4@#ZH&y^Jc~m$}YbFYq{2Xg(`A@^C zr4`LH=q93?fTFmBS3RbssfjAA-#(3a76Hxj2gPkR;rsUxICibyJfar5KYn}-2(F{H zjMH=jtCVxo^jVU?$|m}hE=)PHah|nbrhCrgGYy=pr z5E1S3Tq0Ki*a!F$hcc8%5%nTC@P**0z6OWbMKE(>Nl#jX%Tvr@?DMoaNg%W=B7pD{ z2hPPk**L$?1tzk(w{98MKH>-N)jbG;j6EjZQ{*t8kS7fsxDEun6&5xJuaN_I9)OTm zT`^pLQ2EM%6YoBq^It-qJpgV8QVt9Y>VDpU7Ec#wDlzjBlByaSRxs>Y3jSSvebzt~ zhk<^Mn3H8w{5}JDr>-A#&zeBQeGBJ2uYTScY3Z+#(R=BDNS2XF229qmf%mt05jGtf zaJV$Iw1=#dK|`J0(*KK%^;^!{^P@;7C^IXo7DT7_(2GFOj=s{4@!7OFIPVT$jimhAM*{K z^C*8X@@Svv6*NxIisom-WcZlC52O$W1MnH6Uk)u=fy+5LG0{zg3QNlF=?7xr4H&Kg zpz^@1S_i?cO<&4}2b0f8wVMq*p?08}Z8{DP4kENB2t8=n*;m(25%s~1j{@QeuD0!Q z(Pso6Yh?))Ab$9NEsyNnQsz=0Qn@As?pPR*(C5#yVcjB_Kv-|d(B|Ef6>F4rUyAEC zm4-XC#P_8ddo5-GC%iv0n&j#JT4OpQdX=1h9E)sX`LXYTgiC)(Z{6+TAIV261_%TY zkf5+K&KK7Gq~P$`J-gPhU(l$uSqq=9&5x)72X{3Pbjz%l1QCAg*|R`hU)}^J1tM$* zqF6Ps+YZ^1r{%W>l)wxv;=ZKkwX=E?T6NGVvjJKmOdYFT@7h~M&@XixEK@tZ30pQD9zfV+`A_idC zxcsv6KNSE-*Ak}m_(h|NJe^8MlQ&xF3LgM`2RmYuLA_VG%danYSl|>91>T!|PrBmt z^x1eOge#l=^8&J2TMQ-tw_W=G+`|9gZ+`qsM8ImOCN7t@0DnD#K<%2Qa=}%zfPVvG C5-ykk literal 0 HcmV?d00001 diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png new file mode 100644 index 0000000000000000000000000000000000000000..f0ec89bb004899e4685ec902964977e65e61a086 GIT binary patch literal 25148 zcmZ_0by(D2^eqg6AW~A&phHP_mmm#-f=Ue~-QB4mAvu5yjYumE(%s$N9n#%!&wTIi zzW==U`aGz>%=w&i_CEXUwf5Q*@=jS6`zhH|BqStkc{wRnB&0{|NJz*jPtd?mX2fE0 zz(0ac(i%=`wkA%l1`ftZZw;L6tZbdE%s;4=1cYk>Gd&K1uwM?x~llb4eC;Fi2M@9Guv@uu@gMJw|~ zMi*Nqi}qt_6HHYp!po4j3GDALSYwx|SfrnMMWw$G7z)yS1;L@nC6vN@=B|PLL@c*U z?b(+Xz3mPLo)7m`)-TtT&C?U^8uW_l5|bSns>u`_*5@{%ohBrKUxI^!<)WUPxFSKo zAL;ZpX(=hGuivAI@$m34m=tuuceJ^$+`)ISpTGXEzl=S0MQUf|=Z91KTz+{<#H3#~ zeM=buh9&%JKtdtx5~lUn0`K{!!zqO>I(G5fLzLm+;d`7nVBmq~g?i7D*}cb91P^)W{E8r{K%_wC<_e$;`HQ4?B}u7(11{x5UH zcW?xA&a`uIa1d}m&TMRr!e9G}$@W4-T|0&}0H8E*f>*jFCoOanvS+a}Yo7BZD(0(Hm*qHGgkC7s&zvJeh z5x41L(KoW+#IQGxb@SG({fMLi*h5m)Lr6-tH2|a7ASoL5*PJxI*#!?wt-oBe z;x|~Or9WM5gF}v7PazKUN&TOe*7I^wdmau&Pe#g2RhD+C2S`atr>ywMcrDf@F3x9d z4a=r;)r)k~SHr@>GOcGT@-<3UynOG&(f{b~x$hPn$HnjQK<^ z-(@e9ZoBB%_%M*N@#O7vX|Zo(!dzP?v7Q$sy=^;S*D5&taqYb4V43g3{q2rz{n>LG z*$AVX^SvwU^-5m z*&Ajnr}P|v9Vj*KMRGJU`8Sp~=e8Oi%d8$zV%YiUuo<0t=$ABRPb}jbe>CjN$;SKk zMc;>%hxqB~X{h(vcr2q@U@u!O%fr#JMmUrA-Pt599o_qq&(cgEa$ncF>`hfWGN_22 z5jt)Tto!4Nby3q@@0Ja+P#cc_{#b4?{yjMO$**6(+-{FLbyjLN5-r`f(t}8ZoL>E4 zR?jx+PZT*DQ~Ty!J(v(pjA5b7kPkJ#M|HecsI+N+5OLg0JTa7NTXT|pxQ|M*To~y8 zhK!*}xio0F+91zXNmG`Qk@5K)la$n3P`RM6J(4wDY=APF{dTpalZyG! z{bFynb6j2Tt&|ih^QU}$@C(E+jSu%p&Hm`41={?DjrShs2aALEcQ^8Ka>ZU@ZEaF3 ztE*SQRT6}pnYOpL!$`S1)~l)=H&qLvf_zp}O#v7LB%&VW+72J*oMz;{e)71^;~y;M z$kkgESzl$foY*>r$))b@s)Z<~CR0q6wASWCKBam2@=I~C+G0O&{kJC@{g~bl8~sV6 zKP6BW+&3xr8*Xg4xVYj)+_@ILFLYolmwT0qB%Gg7ft!`vE{ZkY0eAT2da!U+d2_nm zx*932tf24|37d>(X*(y)WNRorjEwiYiuhfuZR1^Br1*V2h#JToHgf_tRm*6RgLgg5 zMbAh{yOV4i5`4A(MJuv(AFyjzu=w6znSRL$xxYK`C7`72?`3Zc>o>_TXbrqQY{eBh zpE3-i5afE`wHe98$dCzt^n#Bs4zA~u0M68j@56Q|DmRk=d(O$pX=HX4q)~i5vDx4DxG$s*^Lae{-K7hogV<-oes=5}OkCVB zj041x)vmDWZiKo_8%z6yiFsdhm<~{UD$r^VAz+YGQervvu5~?7zB*pddzb#`py7rM zScb=Zgtk$fjcs84lD+XZdN@N?t-pgy_y|}$% zHauQ7wC`S*={YXLF|ywcDf& zraeUN#Cd3O?9htMPbiS@oNYdu{l3x1oBnMg>T6)^%R%4!RxoVQT2_Fgfc`Kf_&b*h zvGvT_j2<~diP31p4=fW1(ItwCbgto)v({qJ)uhd)s85y%0ZGRow2l3P0V~GZSb42z znnAqnOo}jiN7CP5@yx7*2XvQR4q2$=_JbstW%W@9xn{9GGB}vbBi@!1`I?z)dD5ZJ ztri>RSG~cd(FU&YX<*Fv@82Wk>oqnsby@*$1X1d&Dlre(Vr^}W@RB5)jCW<;b+Mfz z>~qwH+Tzw{QaQ>3UdbG;;03a{cy=2_qYiKx7ho}1w_#HrY|Tu8-!PH zUKj8Q*)93aTJ3|av^(8;@AGiy+Itg5EuPxd)pe`rAb_Sb$S zEw8R4#b`LXUTQS-D!?Fhka6F7!|FB5B6k?ZE?r{vX(r!B^+IuRlRzUoMA~sP#dW04 z-4Wgk0_uDOvvx)PBmpr;W2UKz$#FMB+B0hE!5`|n-3SkzuW?3xf{AIk-16=A{^kI2 ztu%PwY!pcyX970{iNM#@&dAXQZ%-`{3i=YbPh<_Q&vrTyD~CcUz(r)(pU7(y`M%c0 zI*vtC^3$j1;B>C1dhWkuWevxpks*8teJ91&+9e!%u+C6Atx;u{t}Wd-s^id2ce+2H z1mfCyC97s>0Gm$DA+U*NYHJypdq_mYMi-3c19-fKFZxkoJjsQ8rM-QzeMUmLNHfX4 zLCqrfK2%=_WopzBm56Nm>Rj8=-6i_Wi4^8{0y2wY%SpC>lSRG*j!aSm(h|O&vXNAD zTwJB^(iZRUDeT^tnY^vsy;y4Y2TvGYUCRI$DbuI?EeF^VmwT!6$|I8%uHcZ}mx(uz zUf3g_i1*hkneJK+L%D9Bf9i9LPR6pHy%y%egGImmxlvbcJ{m};7*|!!s$H?0uh>HC z_OS>j2v_@JyrcWjS4Mka$LXCD503?E6+!F)YIfU}w|{nqn?85|kPKy303U?I1&adqUX z%~Vgft93XjxbzYCHprD(G>ZM&Ly0}Pm5yS%V?-I)GDw^4I@Sk`8KV_`{QSTO?j92U z{oOK*H201lAF@gf(P`-uL&8SQ+4d&3 z>F{8mKf4WsSzz})CTD}k#uB!2VA0UgwVN$A)X#c@l*lBOO=rez=;s$s(}84&=q8Lt z7OP3XVNLGn7Jr48RhulZdh2p*{UtrAxLR|UXjyA39Whphk9Vuv>HF4rp>dA$eb&^a z^L<|GBJ)wnc6$$zMK$5Z?Um4ZTt&H8sn4Vb#zD4a;{~$N4)CA-gLjz$j9a!vuhJI9 zl8Sejr4tTm?qgE-8%|D&K4d~V2Ry!aA)Tw+DzvSad?1Zqxl6~&A)xY2p1V51f@~)8%<#|MNpMg=k!v0dwKwSYXhv525*jX5+r2H=XT?nS1bqlHm<(%XX zBHJ5!l;YXtzmr=xzMfZciUS^yUNoL%%kyXbX8CYJHXW_q_W%NrDCoB3P~bR{DJ-}x zTFgYta}gisbhfo4Rh@luR7XJL7kl{ffT;UkafxeS_~bd=&; z%u^IKE?63T4-OEaV_)IU%NzPlq?BT@(2#^RQ+mEjWM5SOe8HQz#^ubCad7NI)#Yam zNqqj2c!%3=awlevFD1wY(_Mof`q*vYQ9spWAdt;B-JzFl+dmvESB*INIp7PX;%~>Y zF!ME*_YR+0cjClzqbKrm+g>cO26BA)TJeIrl2Rbfmu&kRaUFF|@MUVh$?Dz3;>AD5 zu|MjO#HaKChjeSNnV`H%AM?W4s;Z*d- ztT#rb#kNtR?gk2b9UX^O>zbryZ&Nopu$hu(@UcF&ELGQSI8W~6^7Kh%S0Mv_Clm6oiTblIW8as8n^Ohvcp9uBB zv&sZqcH`eSc)72Fu*JJywTff7J5^%S>2S`kiyOksCd~ddP zoRS->m4w$KcDUSJ4c_T?f3+E2CFK0i2=RtofQIeC=_Us#$e|inp9NYq z#I?UYnnMomTN!2L2*sOl{sl7-J!d(vTBk}3gX~ws`N=>U&JPZ!XC5flbc0HloO$WjdJJmnurWTJB&RO3c0iaK}qd zP8e7TBGChP@Z%=WgZiL(8`IOR;oZf?w7Z)N;%oOvN?{q0jXzus`f4=#h?bO;(9+X8 z0Q*SJH(E{>-ky%C{I>&pn`yo#zze~fQ8@mLFwoPNf?@e=7y7`qs5m%qd7kewIwhNe zIH|Wil#T+PYr5JI&Z1Kj>yM5*0@$BL)bAOBqMY9|#L8I#If><5-qOGTAKQY2c|P~!pSW4GzHPR~TL(h`OXH4+ zP1e={T;T&)MhM9zpK6wZeJ7=Bl+R4?U>vLV7VzT|@e7czY)=+Z1ABoiOBEpD2x@WN z%=a*mU?1++9vlO6J>u}a_u8*JQEqAJbL|Gn3Afv!KFFk`vJ`&2(rXX_Mi4H7!4cpC z;9%ANz(;E!HZlTCBY=ZpX9U!~oyci#=0^gzDL4#`$tNdQ3a|H=SeqGaiG~xL&*2xj{mR4c#kbNmOn@>bbYRjAv83M08GKC`B_-{_ISDhZJ}e6lbLx4 zD3GI-PUBv-x)+R`o8H6>Yq=n3vR2}NQpe=Er4?q6;lQ$4q#Jz^FN;o$k1^p?BpL+SWT}u^nn}#!C?Sw zxdsfw0D*j^_Ps5UYkZTyWn6VOp`9S^D`s1Fga+63U}V)Qw{wpe8~b2Wz5cwgYUL?l za=H-?dvx^s;~%soF4xVzD(!N!=1D!@w;)Lm4hgZZ27wNTV`sG7dN$7YZb#2*p)O%0 zOR>yuxdn0u_F_$XK+#JC>baLU>n&%BQh;$`SBGr1bt)CY#F@lj(*Sb|uI^UWAuEkzLl5r zkn!8>I#lmf%pR*y0&w)5f5FZFeTGbl^(fEhnE5d;xhcaX%9m&?WhcnPYfHXPZoxUP) zR`HFvSny;QXRHMrR+s3>*qk}SR}e`*_n=5?h4rk)Ukf4{nX3I9mxsHH6L3w8)_b~A z;Ghe;?ymtZBO4GK0LB#?w4!x{lXrrz=`vH?gZc}qAYAHfFl*`id9UB`n=6+o^t;y+ z+lpS6Wf%D|j*Gwkz<#VBIk^U+%D!_!vSeUnxzQm`~9iF0)g(YIt#Tc2h2t?LlC!V@;#n_^S=*( zJD7Q;UYI-#NGHUV%SPbRc)5Z}12~6QYT03Uhj7bD1nQ0Al}wa`uFTt+7`Xl_Cf|dgqLds0^R{55|}GfR_Sq~H!L4~u z8t2A=HC~T*AsIIEVc2G!T708z1!Bsx6=Im$QnWA32SN!GTMbgGyrgNg%7#~8Nlpb| zU?oEx2;d?v!cfO>L6xmPMN13P@*;^20R+Zur6LPxJjTo+$uo`# zw!yJcn#?N*{=3+^E=^<8S`g5Fk{`G!LswRQ`e|XV{O&n^>@{+T+>6?Q#^YAQf6*7Q z`KL(##)X9E$#3-w=U52jiv)P=N2`3#+1a@jLHoQgRUOL0d$53V=_Fi4jlKD4tcn0vxjqAvL17yNF8oja$`916h48yvR>cJUz4L+@q3O7$&n^F;U{oNlqo}0 zegK5HDwoEz+vBcXKXZ2ewD4DUPFt4ATROG{{mRzEWe%5oZ8$bZ9LsW?89KS(j{=dq za?LK~GyP1CmX?-sQ9o3=lr2KVy_uAnd&;w_K;DTH=&y|B^1Z37_;6B?plQ2I`@!9s z3cUBBtmpW!ArRDAq@^Wp&h7X~#|$esF={eoZ@BWKw_9;VU3BE151vG6-;iD0QAkTk z21XOx$E4yo|7-gFkt5|ExLqh47sK{<*Xg|tKFsG|mM1cJ7{&nNeH+EN zyZ&O(!n^%O!p~>I*M%PM|P8TSC~ooHc2vk!ey$uUX*e zZ@-jZ9|zknChl(<`?4!LQB7pV8IJjForDbL1h&R8xwu*PUqJBNfLB1r{-FCFxRfTD ziA;zqDSVX<^&2pfXVDIJDmEUK^^;}y=`!XRA}t4;7f@UeJR>vqV*GP8un;* z8Wfjfh7_+8{Ni_X8ZLn$dQ$?nj#o!K!7d_|Vc zY6@$QAK!yepVU(R%l9PLj--maTW%8ZsQIhsTMtJez&paDV~;)v+~)U4{z>j0d4`^K zIm|uiwp$bU@#;@xQ}Ri4>{{qgDXBV@?og!S(`Ka>T*^Z)Ke4Mvk@GkN^ll>Fv0b0> z@%tDR67!iM=~CUHOHPtM;Idi~H6;4`MTV6(0!)mtFW*c-51#iH_9jXfn}i=L<6(#@ z=mx|@;xa_^$9XTpYy0N(WFJioX6QmDf7Q>zTbHXhy{ICCgDV-`8JB%1k0{PIB2NVV zt&kKj)qE*b`<-ISp4z@>ddob6g%XIxDLWt~B^ewI$KMSPQHFHl9K~0OsJTPt(f&ET z=OC$|31<&#FrPLJBkx?XOfzhLbV8UOELW^{tSTv(a=1REE=td~BaCA@rQ+{R8J8Rs zhfDuD{>ImwJnC}#8QQ{frZT1D>RUXRel{R@djj&SGpka~x1UwOjje@R1Zs2-Kxq-iSE%I%sFCM|_O?)N^7F8cCej`qY_3Ent#oAg^ z1f^JnqEsdcSY`UJ2vY)+Bg#8ww#^Amc`Z`PHf0>@J^NE*ER|wo+5EZHXefQ4%WRqlg*oL6g`t^=$(AN`XA}T`I&ny@me{Ba# z)@S9>26_UvzfR=aZwHOCZ`fY8*>v(5e05GUcs*Iz5H9<_=Pp4u=b5t3ex_$W)cPGz6&or55 zjUq*B_R1Z@n3MBxQqQIztvC^t4}TDy`cjPLs9^k0e_8@>LtlGNAlZ$+ZNO5vP1g}V*%aHwGXAx4oIH6* z7RyzYR?}7Zdg&%7&sn7M`N^AjxdBqk!Io}gA(Y~b6B!8pC0HCJ2&KZda<}L4eLo74 z*C`Cw5Wlgf%P*e=@O53@P8_uz4cjhC@h}wjhOL_=DXH&WKwL2X%3(+dRPkXj7=@ri zz;`0%Yhj89Ib}W-@|2gvTb(Q>F)|8I6Z5sB1ykNtvaM8|OhD*pd zDaaLDRUoTy#LWn3)_$S^26Y2#{@$l_x%j|RyD{qTot)TuxZv6FRN+zL{66P0Hab;H za$T)c9@qywMpQcX^p%Di5f|1K8Tf4F!oNLzT8(^V3R}&K$NDc~^1@?wiZ0CMAQ026 zH9A58ijAjunAJ@}Gi!^5pRm!i)I;F-xd*3iTyiB!`UD)>*GXe3fxRGXKF26ZlJEd*cRE4r@kxQ7|R;epg~sK(i{w6s)& zDhUSr(r@Be4GPj7#0F?Ng*vRJA2uFY``-E)_m=Z$<*)C#$8HLfX;8Z@`Ca$}!Nj8? z26jY&5Ct8_Xe{@=*HJq$kM&IKK#H(quHOkU5%VV_ARM@Y&)2$o3imC6%trV?!$q`H z|J-0wRsCq|6wK^-%r4(WJJP}}o3iwd6zxwpQTGH;ZN0lb%K+#DQFT;G;tR)QuZsfM z!?O#mPY_6uoJgN0GrhS0{@kfQN5j=bP0;y zqEt5kaXWzW3|ui}$>}qTuGRF}45_nD`Pj#&_|&*6qK_};P}Yf!ntn->OdVgO;4r{R zPZ@B6WR5}h?8fVUb<|0X0J-FXdG9mO&_$AE8&5|R6GT1vsJ#x6i|WtGs;jGMX=#^E zi&qS8FOLA>v7rYh5PEmi2A@pp|Lz>!%EG?DAi@ z+hpAYc#W7<<1*GW<>{b=&0{%%AQ?{E z^Yyt}^82ML*^xSLk9G!qk(Zm&6N{q=tqkPQr}wTFR94K9ZfmSdtou-)_2cl+4u?u) zYsBYpNdf?D&D_o^kYt)I)Cmf^>=y5WtMljQ_U~-)s0};2q8aL@R*XK=p$ z_ffyD&#O|pHH!Y=q6m3kSJ|TA;|cn!$G`#R9xXLijAPeJU5o=V0l*4?Qs-Ob3lTa7 zhM)rNN_--s^#Tt6-e1yjA&NL0am#GhjEq|JtkT5fm}*wI+RqD6^zXfh3c(H>Uz=xq zK|$wo+)&yWNa+R8whBOBM47nW^8)QB*cdEwK9+||K$e|=+Sef{Ndjlp8v<0P%fZ6i z#Rl(sN((!H$a*)fPHC?dghN7xDvPSW($ch6rpucZ&l-#=2^V=XX`zD%bnpnQ;0>d# zcr&HzTr)_y5fw$iVEoi9GXbO^k1zf~s;K7^OyM7I?f`>vx-%IGPKe#GH1RN4??j?QX@`*wi3-nU$ShEdO*7nS$?TOElvi!1B!yJyJ(6=L4{ z0~Ec5w$up~4(9Tvw9V z!O@@zseh3OZ&CFa|0lRLj|YEmN+=mY-aKI-=1{EI{7e%fefrY>G3G6CPC!GS|B+GD zN*s@E8?+}b28D`=G&tDp+a$xT!f;kj8ks!^n;2NmNj#*1%@{qFg5_@Gd5!H@_x*R} z#@Lo-y*|IH0)H40^nS4u=H$GO_#mZ#59YaXswRz;HoV~A#scTi)gXMsox+CCE_h;V zPjh5Nn<|!BmdcF50H?6Bs$19GJ%5=>Ehm*3zg_;`uX{kuSrn$kwjtcR+7ewm#3J7- z(&nlBpxviN&cMBu=KGYJ!hhlb>)U4PiMF5A#lqR3P3JDEOf5U63F6AZDt>`DG5+vlj)Ns=`8)BpyuE!F2mktf1uL+)<-s7;0 zv()A*S<}cG3q%I@XHt_?W}DhEK2oys*5abq!{7-ajc7!T7w4g~-Zy$Ws217oJm$p$ z=G&x}@QWEipoOoiq>$6(Se^fKiOqj^dVJxr$En}PKVH-^pyEa5 ze&o-+W17tNqlQqu^eV#sTBM%&nSI`6n)gSnwDnV(Tuye7wWrxZtQE50#HX>VRXs0pd zuUovt;Mivhx?X+De$|fivq9V)C*mEgxSrg!FosD^#W9O$ILF&ppsyGQn^G#Y2Zb*fgqI~29yG3HW)@Z795-&C2bZ!5&%6O$@& z)5_{gy7s@U5#gW4x%695>$er{xUpQs^6Bm&s&zcaD-1a3EV@|4vI5(Jzt}3~xSyxs zP~#YOTrvp2emMxe6q+gBv2CfUU$X2_B+Z7Aq)W>^y7@jtE?D(=<&E{MqyrbIGTuX>qT_u=1B6sK zoSb5jDr-{aOUn(7ASwbe7>)&Csl~t(Yxan8jymzfWu%fASYr~0@CchxjAJ= z9opcvf?u<8B1QIaZC?_!dA6~k&|pGbI(D&AqmZ3Fys!#3xr@Wr4WSRP7bAu9c{}2& z{m1e24E$HG$nY?nft6@EkH*es1mrtfD~A->j^4h8v~Qh$begRmy9U9wZ*NRmY9J<> zm|9-pYXIuWwO#7@ZHwiJG=?fEY1EI$aIr)y!Gp&fu1-!C`=PczD!1ovL@b-0Dpl9ehDpObsvb5k=>w z6vL8{k_-nAlvCl~aeW()v}h${p!3%C(k{G4ZmW1UT^6^Ojl5Xw%KR+|w~pCs?Y+s$ zCp_Y>sYf*1!YBTOeTY?->_7tDIZgL3Euq7JVQIM$x-2H9W-R z2A&tM?1}MSlXIg50Ck-0f@_R~@t?NGKRsO+AJeU%;N&}=1m{@)n3n!XT2)!e>{1{y|fbY}HL@gkw>XhuEPVle?`#f&Hj9rV3eqvF-8Z2Z<` z4gz97jnV?}H#WH!acnU4)ZWHVh5M%O+|U0_K^fZCG|LcDUcy9Wa-uTnP-Pw4ZkZj6 z8UlK=Z{K18vng^EaP|;_D}L(C1NfKNI>L-oC%0?!6A$ZTk=|7{H9~BTN@NnQHr(Nm zOFfS}PA>)YO4JUG>ge&whKUlyS68^2&SGWK1oXuT}%Ryl^o9gHFJL+jUku>|i>zpX@%GSxhbr>Lz5yS4f zRNdGFw@Q$U&Nk#SkC1^lL3EzOAEWH0&qxqBb>%%qpvR7*3q^_#&>q!J#C4 z-L1%3vT7u-Dw4*fpm1y~0QoX#6SPpzS{p{dFy-6%Mhr(z>g%^?_)2t2cz=z{D`1;X zq#+s!qU3CKLj}`lV;UUaWNR(@%Y&-`#{%tvSK1s5A$LTaS+hVSjKVnQN=31zZU=BE z5^I?urtiQ@BB(2Ee2?W9J6_^T(W?ZSH(r!N`K*yBH9Hb{5$Eht3fPJzyQcEVo*2)K zFyfD6M2thV2Q6Z|B0)C+F611^_E3ThHCf1tLU*sb!rO0I1u{dvE|x7E%KaV~xIx>a zypz(INR{&@_g*1EqR$H~&y7mi-{8Yu{9IcX5umVZohuB}XEAulWaWDi#DUY$IFR2( z2cg2IcryMwA%lIVYUKJStM*VEF`gip%E@8l6;91j#O(2wox^nFYJlEndC86OV+X34l9|#%MK(T*@^L6Jtc6VAnaQ3?i%UDv1Uiex&w^}e z+B7iayBt~JO55*fhsG-eQtg(7p>nXhi^g}24aSqH_9tExVZo?ObnKf=+X42gKPO|| zk||Y>2}n>b3wrl>^h7$+mP1f0H~LwVCg0wD9s95t2xxQpZbiJToHtH9x1Loo^(UW3 zOxj7VbzzSO#UojnLmd)qZ-k{_OB1D8#*RcjM*GnIc_KguN&~{U(Pt>g$we*bhm$_P zv;K~6cbL0WtxPDPKdJ9&X>!#pY|t&HkV%UHqM-X7B1~>U3IZkk(04(%l`~#me4;U< zI{>oD%u9qOvR7@U!@scnz+}E9otX*T0kxzgstr;}%!=i2Lu_(QM-O|qY}Yhby>Uh? zN?1^a41f5YP7I4y26La<oaOD@6mvhvb=I4yIQu9E!T$*& z!M9QQ`3DaYgYA*_%97xKc$Yn>?f)bhywr@0iQ4ptX|6$Q!O z&PKT+1Gidrn4G*3P@Jr~K-!mq6#EwdLkLI>B=#o=fg+H2mjQ9&Z}p;mm)n`Ymetq) z{*a3MhyaOX-Tn2B$jxrqf56^m?lOPS6+mH7#TIl}djilsOWfk}C>8^yr>wTk<3mFy z?%(^!6^Sp~`@g`u`K?m(>jdwGA`^eQU0|taH^n z{Y-k6b=Efv0>jO^<|Bbczv7F7h88`WJmIvDch+-o(C@_2L#Kz_FO9FQ16d(00H=U_ zs4Ab#eeaKgo)3RcEnxRkuJhq)GY!Z^$JSNnm?|5_O1)R42Q7+-@Tl!Ro8s*=Ut7I~ zgd#t|4I;7LU{i&l-dSWEt#%TSl7hyAhzPdx%_+cyeBxjxmP<|juscWx*)J_yrom^u z5jx>qUgq$w(F~QBs_WZ6y-{*6qGt#C4>}Q%WPfxW^=%3fw@zzG+=keKze!CR}dp7d*MXr<~01z7Wa1uIPS)frdjN z2c$ao_Vx(vDe#0Z1PsbUwXU{E9%uigfifK7kw~CC%dt0GX$ph|2(dRP(#C|7^K%(? zVA%#~{VeC%nwuVopmh2CnMZ_m#nRl|uDC-#Z0+&Bnk(s_SW1rd`ps$b$AExh}KY(al@tX^Gj2`v@u800D~P=~g}g*TubAjeA4` z*VlpjYmB(wZ0n}isfK%6K+dQNm5HpdIhg3T%`pcZRB}UCfQ%`@>a=Ll<$wN;oIw$O zEy4mDI+=4!ZN$K$+MU%<&Oi*LY{e2*8g4+TUUT*hjulwi{Iw5U z4PVJCX6?;l6`^Q&XmHDJGV?^jV{}KucCai(9y})bYwvJbETbr;-_N>L}CzuRSodpkT`Z! zAaCSlr1KQ68T~vXmc>Kv8VH15=(I-6bH`I#O_25gBrE*N5;X78ZWZWy|GV}zZ%JT? zZ22;{bAAf4xn>5w=Pd(rm@Tb&0=EVx15V<%E1@rW#~EPNpf3#~QKVC=HaTxvAJ61n ztpMoQ0h6UBH4IS0QA)#c5jHn3c4ZeikFPEmP8PgR$W6+LeMr@k%5-b%e53H){ht|V zELgek??dA-4AkR~A;8es-X3afbf0+z03qk~a!|8P=dz!4=uah5v*AZnZcPWZD7gb* zH*kVlS)Qi2~9L)|<(h8B{6fcQC)tyzluLf^4~6tLWH}0uA>2 zbN7%zc|uJUpO&_Yx6ZQvoGAG`?eT?vB{B|Y% z=LA9i#T*X}$f1`XJ0psn|0u>F?|OZHu0D7sl1|6M;)@r(hvbmp(WO`W0OH9ne?mRR59%aF^oQH+Fy zIGsg;{%GHgfD}eZi!YJ$U?w<3lApXpoD@nnRfe3oeH-G)<-5yV3OWnZm5!(QZIb!F z3i2ZyE7REMbzd?pE`yx}jo|uVsbW^d2agCtG>K1>Z}ZWPnc=j00U7e&k~~CgaEIq~ z7}nmoRGw3`xZPwp)Ll@~GQ63s@Oh%#1|JzUXhH2Q6c?f!^)8+_w(Y2K?*EPgY8F^KRcM?>4_`A*TYHHc+m$d(3*BKh_V_zua+xoVw%vwOFJ+*phXxQH(y z!Rl;x^$gW(skWYYP9UMEo;r3-1C;zW)=$2TIe3=h9dT-~y0NLJ64V7=AKT}@k)4H* zvAosKXF3L)RB$XyXC#432Ufi5+B1JmiwlpzKT^_PXZPph`qAtb4+bBP=c$2%uKH`2 zomz2#)Yco#j=d-fp{>UM1=Oo5)V)BF)2l~S?_zDIq^k~&6kShc=U$j$eD72!b{SP) zA~D7NQ^hvV|MT6NQyP~)dXh_m3f*rSZf>8!g5k!dNwc|W z%}88w1~f-(d(C$LJ|O1#%0;-`v&CbOhR=rQ!V3?Bj-0>duKs^$*48Xh4doOjgd=Wl zlpNiT=Y}|Mj;h@7k>J@n2z7h!+QVToC?q`rx9)7jP^3v+%Qy^;vJUq5&q0$Q9wQ7w zRo7W#I1I5SeerVjaK@?yYf?>yPoKuV+{XI8`r!;--R#5>R<(u*8FXq6rJD`zCtk!s z!NioBCEa_m!;1kYDXf963mAm~>rTLBGL1@SmwT95YbzdhoC_Of=7{BK%>+}RA}5@c zYBG`BsX_EpC^b*!r~f-UmYO@uaRjdHS0~I=8uLFE>ihAxArj_$T2|HjIbkbkZQO3f zzLHFAh=JQY+|}{*>=U$WvA24J+m`h=mBX9lo&kXd6|t&M7EH0#Qv>vg+UJ#~ zyddg>vTC^ET3u$A=JTwZ?H~O84bOfeOjb@Qtnbl39OtNK)pSGbM8`YcDG`#)9E2iv z)m9)F#F4l-8F=a>-nRDmzxmc$$>DnzQz_WGJJkQ)R~Ql)vyC^t$#K#zGB9oO8+v7k zIEM;#Ur^F*vo1s;8V)gu&|3f>Z-{{FWJRH)vF9JiZQIB(NYPnj?W~<5K7bh!gJe2xd zUe1+1aL<&%+*(>+NbuqMDtT!9OG2T<# zv-~_VP(*CR6AbrvYgkvUbRj={rj{EXGY6G484pBl%w3%h&@SJ6q{5LJ%1*x)cLO9w zc!K@^#yR3QS!DHE{1Q$dOPY=c2Kdz;xL9)esk5hb#S5NC-3osM8{v@XvZs$BFJNY# z&;%XqY+HVk@R1yU?fUeK_sv|Q^?VKNdVDF0+`b!;+^q2&9YsYy0=*GvEA!XZukl0Q zAf~{XLtr>SvD46ae{9Vn1{6b5YgqsBh||^{+UMdtg(m+kjN6^7+!_;ewZjc+K=_Z` z0kLf*&!}--+2tyBZ+5=N&AC$Zm_NFh&k3nrAy;OIWykeLYu^8>Y$GHve_Vw!_QLp6&g?BRRs!8NI#f0i_F+Vk+l^ z4y1Df+_SR(j5YA@(!Q(T*}N24$2~|u*_!;>cSY6qhte_~TfiqCw+f?^bOuM|?>NvS zde%(|$ah@+J?TTuX0<%FwcIXuYaL22tkf>YTTRC z@0knk7>)q}s>i=zk%x4Q0B7sWyI`|~ycH9ZK zA4dzbMpuXwf0Oyt{8A*GT@@ho}}cGXgoSlLLP9H*j^ zh_m<83vGHfsB4O5axN+&7^yxvIvUzN*KiP}#EfE=6HU>c1?nWoL$fvb zPOr*H0}cLOX#D>*4R{#5AQ?^2==$}zKt&tlb6In$Xkr($^rl_Y?bXIvP;ZW!ua({o zishIe3c43@JTVYu&4SKcM3bYa$0?$2ifAPV!eE5ReaXA9dV|c0FR2&NJa=Rg4;F7) z+3Jt0<?SR50REdtR<};x{A7>rrH5Ywh6qJ&p~Z=d62N@hPi6X@dmbLj*bO~w!wV#Wz&#~8G)0YKo zdOvqocTLa*^n>XmgpG}@u>iET1$_Hf0{Q_T9S8%3Kac<`fKCfiJ}VN?Gn~|E_;7oo z5&IK6Hq80nHFfRBwS+hn2sRx`v>u1yqK^+)p+TZ7-3ax#fQvFw28NoFI^Px+1$N z4H*r#lnP|mg6cu|OZni-Ys*~K_ByddJf(G?9TYhGh0Z(V+^+6kpqSq0OF!@H1?-LI z0P@+vVj4mU(N3fe_O#lUj1 zMXtjR^I(J|l-*iI1i3fvO6)ZT6DOzL^~H$*h0`h}pu}zKAM4a!GnM;&TwBWmoi^Zu zG4}F1|AKZ$J>R<@pp^nq(nk7}uOVS$W8)*KN&@;DkY6w{{mxDQ9_$9XLP6f;y71-U z?(kVSTr-*e`c%z_loMvEjAL+OQnkaT(AVLvDnKJ!L(hh!qhXX%HDxuF9nRkJ)J>OF z)fKcr+X2l%_S>Wm&?~V$U&{{|0t73Cv^!fF308Le?@!Ul?~lwb`*YnpMUBdzKSMzO z?J|&Qvqy?uzXG4{0f-W!e}W$~m8HB2{aTy_U+bKg;yT7Ztb~5NkygsfEA0JsVzum$ zcpUC2JDAF0n>xArzdAebaIF7-kDEzW$jZt})}0k@l(Ir1GYVPVGRx*Gk*%yKdu1d$ z+55J#S4L!$EkekM^ZGcybDiHG=Umsh&R?#J`~Hmge7#?<=ksAJ#+ye|9EKl=yOp5! zq6|(1WVnMpc>gcAdh#j1A~#qi2GtFzsSf9FhPT06!XWzOeEEb6<7--hEo9;@wn8?f zuw?BVk*UPH{IzlY+1);B5{qHS_H8z0(IOQCoG;;fcK2?{kKqF`#g~jj#B#^!^VqX` z+~@5yqKozWw&GKyBy+)b&s3T*8q+S1C1|3sHGhDRa3f zfS~-{ZLw6IhwjONTVr-r?OE@YaU_eANw5!{O4ST-&QR=PByOEjP=u6J{ao@)(BCQ` z850;QYnwQjpQ~KWWr(>fy>(XmxYeV+Z8H!RJ{wqDCP34hyAeMEUc7&?gq7 ztY^Bao%q3GG4P2K3maW~`-1SHS!-9>*G#r_?MJH`F;J`7G<_66e*r?nlO%^r=dI>AU+yGtu*ppytU}4* zYMpVeUMC+;vB#@ho zjsiSmLOi?LH`=CX-KJmG*{}z6u^=cN`FSMyJZ*j@{TUq0EY=M~@u<^G&DgqTwZ!;I zzvg(1Jg1y~bh3rfV*}B7P!OH%x($oBYaHz@8r$Jzxlh!{}QCy?E! zmP%@)ODU_e#7U<-ah#m!FbA&-eZdXenD&6jLiKT(r7PoBBy1%K#J-zid63*1v@(JoQ{>+YbXUSyoS@lpomq_0W+YVrr^olS z8H2+E-(J{0aOSt*-kAsk5APInj`CGpl+@u`>+c^rs}!RNPZf2Dd&5sZeOd#czjkgo z^l;qaH(}Fsb5oOEr6yW6^;PKgL!sKj5%Y=nD;tYt59+SgV&1yu zMU>kYd{^oTShA=Wx_wWMh)@+c7e&3f#hqr-O7G-tL_~PI%$DfB?R|<2pyLH6Q~7TV z7<(#9s&L`hcJE55&Gm@~`iy;wA>Ku1`G;RZeNCF4Gpe8O{}KX8sOI3LB54!!$>U4C zA_>|!4vsn}ToLA&M%(hZ+%P8cE)#n7;Md(4ySghar?V{Ou|-oPcOK!r93T#5lyeZS*-O^n$kgE*8UCELD7CZno~_svIc7EY0_a3b)^$s)&@u2~ zJy~q4A=D^(WxP81RQGni75uc9c;fct-J(<&>&66Dg&ujA7nt&<5Qc>~f8W6p=><)f zh%#%xaE_<}bP^=GoTttzdQ7_6V{slQwWvaSE8!^JfrM}By4iJUJ7(-i(F2j6L~7j9 zyYyhG-wx{$jh`7;`mjhu|30I*0$GTN&$~#C@2X0dp$w&22TOA|@OWnu)r!BUIh9m; z%?{_Uw}K8!QLtzGV`rZJMdwXCDWXzsZ z?m-T>ocBUmH|&q^?AY&FAH0safEj2(nQI>Rp?Zl_V^aqdjF493yw}oNT<^r(Ss*4M zG4hl^E32{X&B+DjC{vBtyL3}Iec#|=eJLPq+`xTdSI0^8uJEMe8T+rZQ6FpvgyvsLQXm>_v3r(BezzixH++RuDXlg?4JY|`%C_7 zH0vph>P(CR^~;NR`RZdeMJ~1@U+{`9~>V4 zS>{gpLa$OHln(QT7?(T{orWhEHJS0oBo6xn=Wkq#P2T;ot~f_4ZN(nn0b=LX2Xr2| zdL>FxPQAWjm<#Zg-1{k2li}#D^uT%h8LZQpdyK0LHLTC4{ikIO6wkmp#6A9sm+9~C zrC=HHBj7^aIBlPsrPho891!_^QyOKEE$~A1bEM{9TEoxpX6oqKnW`STxfD-C>#SNu214 zQ{!B?2meWx@mHfsJ=?_(3BM~JL=0-jN|jHWVfq7abQKxV)*8Pov%aDj|0x%`S#|Zn=jH-OqZ1Nw5OtSL`=ALLpQj2>$J~^o?R`vXrjh zAH}~}@%%uMwus;xjbJh6R@CK%oXL+@l+;jhDGh|7HKenZKH>%hBh}^RbLim3b9q#bOWxa*yn{Rc{DxEup;C7}Op_d@$F^@el4zeTo z+yuL?@VAbWqC_F-TVV<*WPj+?=Qum&%or8hCuAWZY2?MMoA1+~a{2tus!K1CdhD-W zhpU_7$b`Lhx}!^T3vw{5+I!$I-g}L|mXYjS-_75po5sb}Ai2u!<*i?;CZ#|`dir^G z0Wy~jvHOeydd58rcat&@!>b7JsI$YU?^ zxG1U4bD~;PvunQjJ?AwV^4@N9%J$^?+9{8#kW#u4qH+`2=kdTAjoFOEJL)gx{(GRm z*PX<8#38809l}TiX-i$&b&+y`I6_X;87#H>A3<3(9b!TOF=_u&KhUVY^u3SPapM>+ zG(A(h+{)BhU8ql8lX>^+fIWl8!JF3~y9!x4j~THmKY|Sm(<}suh>%g}SMj?;4vItk z=YE^we{GwVT8UKp@X$+M{+aHSv!o$)OiGZn^{j9J_mx9);*@44P9)0p`&2`*sZ4q~ zPJyMI@?$THu<`=m#me6kt|^sSn*UU(HwuW2j%F1|1<$gF{iAB&P$c9Ryr#;)Sdy;F zg%r`=oOd}0{6iJ#mQX-Q`?)&gDO#S!%F(KVOPuZu>DdCx*pZQ%yR_CCuki`PDoCqc z@LkQx$Sd31-=A(XXJ3ap*Lo6>C~=o!2|iquDGq#s`N~~?otwL2SLU-iV}nszFvja; z@EsY<=5+4nMSEglWDv{7PLLO4D$91v$m5(gW3k@*GZ9~yT+Gyr$S5e%{)S7s_!XIv zs6GgDVC!GY?9MBRG&GSojnawjEiD&rD8X9)Q3Hf_8z0P}J@~md7m9HE*XGF9jSqg~ zOetO}>@XV2f%v_(iFEp2c6^A(8o}z`cje;APEEYTSA)R-NrXzri>CJL0|}bDhcDlgJiGJh$N0y_0yubdzn6mw^2=@GEqb|tX@1tuV=2~6bz-9|YO8MnleN9^ zpdnC<*9xq)SH*=4!)BaN&Tlo>w4c8O>TZZtzQN?=flB-ZiaSLIAz znPSRfH_+NY(a{Ln{O&ga_HK=AO)lVN0?_RUkz>^M0#Tsz&*og5lq(AINsFMQHwVcI zg5m+w$cKBqxy03Q@I_rd;i)#4NORNXb7k0_VQrV+<7YDN`dYQe}%QV!Si=i&gCRBa^hK)Y5=q=9IVy)QFKj$z^tH!goo)b znc|UfCbTR#=K+%i2Z;^-!EGdRLd!5P{8~sbE{^kM!LgUn>u@Htvs=1aiPZ}bi4;Qp zRKH|oJ^yEgb%*l_X{&W;{A=))N8}n7O z>UVZoXtUP4M@lB-d=>FFD>N%kk>WqDgRVvBWy7FJ;lo*n%@!2{S$!<$Rj>fyqypp% z_4At5VB1Eq@QWii5X)>|1(vB%sFr+2hJ+`%YO;IA*Ec!_ji|Eb@%2N34@TE%clDdE z^2%uyRWUT0XOb_{aOT*}4LbbzKF9e_ z8ZS|}hroh8Pf2O7*2}}rPB>Q7l!!0l zDe32bYv98Ii?X{Im3koarupM;P8K2nG34pvKk_2uR!cmje&j?s?!u`35oe;>aiOd? zkUWkg&d8I7SJ$sXqO*xpo`%Og8*mRm4BJ+@1Vac>tndS`Lq;DBZZSqNn`tnv>~uK3 z3Thk8{18tkNPULSdidrM$7aE(Z<7hF-u}-0rLTl_e}|c_?;mbx<%aU=_|OV;r966J zSs-7vn5?C)=lv4D`}JFCIP|?T?M=RZ1OTA5(c-|wL?+-%;N4VJB?U|(Q7W`0LR)(X zbs^xaa)e7a#tNAQg@$%N;xzWMJGu% zDaMYNz`0iSzagPIGnJ7Y*6p3($$*(*cl;9olK7t-=2ehnE>79gsUBcpZG|IzE@-4w z1ceII$-sF?2izhheb-vreLs;!NR#M96K3Gye0O!u;-y{Y#Wz1t~XTYYkZ zEltt#2#GKo&xz}e%R5@iH|&lQU0MpZV~t)XB%X&4Snq!bsq}&r7$+CaPxS08Ks9~B zi$;xmSwgXXKHj{Vi-quwW+Di?wABB~0X&!9vJ zB1lQIktuh{ceiR^7JX`gi6?oL=aO#pIpdG%+dia*BHyELW78j&B8a;(D71G^MJ-@& z()YKMW7Eb%-yklQVB&;waJ;_f@82NvPAtQ3z>)*9NxBwe)I9uMRWe*) zQuy^;rc>5UlN||0vq)pOEZ|@JPah@LHH*=bs9qXUk6`;Ko ze;^f8`R3fn$S%*&9tAzUncSZ$?Wc6+8oTa`jh~fHa2WEhR)FsV4mVmi z)wZ9!NnrT`LDx;7KYtpvY1yqn@O+Yz@nG1pDz~EeeUV1BP`%;z!G(YQ;HNiYZ4w0D z7ClgP*!lCO^#$0(x2R}o119YDzR%sy(-Sg2nZfrrO7~gb;fXa$`L7>@2dj={lrhrr zpZMVEZYdzcF^GVM>#F|+O3H?h8UX$a`sI2B_fDb4zugYGtL)Dm;!0#oW2gEqij1~@ z|2Fd5|AsniDizjjo!eO%LReWPb`$Y=dHi017X*<4Ok`A4Htb(w386rynVMoF3_+BC ztgPzr>Hyr51Ra|Q;Fk7W^>EFA9()QNWq_+s zzL)SIyv@G%a>wKXml7rHeIUJ#h*1AXz4b1(8sc5AKY1}+_^21eeE`zt{ATFN49nIp z*C)6QcIDe$BE6r$Jt<~4t`tblT=6fXN z=AN1MOJYH&oGfh!;thkpi4+Uins@bb#CmjwPH%*)hoPYe~DSW+iRI(IxJAt6C% zq z7e$}k3}?AMQfv`kFL#a}py+H+tw0b{D?_;hK;p#oBrb1!>Pd)%JO8II^!_ zeep`gN5jqz1wqeQz;9>T&wV*2=Qg&PsCoz1NNO0X*Z0Po8vRKS!7MT?+#v4Shc2a; zc~Jw+p#){Q%N!iOz|Ii{LXOMN?+SpAMgVp7awKPgN6HMZ#|5Nd1VaqGw3tAae}aIX zd>~b_P32In@|qwA)Qu^Hbq6YobG>k2jDZnA8^(lgl-j6~19c{Vnl$n7p<)#i$oe7{ z=rb0G4sU(dylJ(f7`)!-M$`V<4i9h>l)pZNrm;7lKv@VH79#hHY^hxKKj2 z-$gYwHIJAkDhJv+_5TNh>2lUb&JpQfSbTE~<}>W0YcAFGO~hB=#V%jI8~~s&B(jPZ zv+Z~dp1F-rc#SZEC7hPd0}Uz_;CXL(bzVW)XA{VA8adiLKy^Q2HpOOZXHx-?hZroY z$ZTihaI3$9!UOAHq0=kovo|B{Hs7|^H)2Ng8S zhf*2dF|c?+ooyM0q%2g3Mys5GuL_i8;O-$jHV8Ci-P|hbJ?yy0lD=etmgzFgp!Gm--NM^teJ)N z$C4?Yy9@|Q3%Mu_ulBP9RviFm=V)iAxoe%!OS=gosA%Xt1m_RYKP3x`8Zhl(YTtDG z-G3V(+{WV-_D-wAp|G}Lx?&rEzl}`X%{_-KW1+_#7t+;|Bo%?`6$*4{hShI{d#*~j zNP+dtv`I3lsJc4U34P=FDfZaMBq`Zo-q=&!^zL@fSE4aSt+!L@I zu7pXg3j*0Z1d7g#rPklXPU(u!*vI62sr(O0+MbM86d^2Nh=*oZ^3K>B_}>sXILZoY@&&TSKK}s?AjeVw literal 0 HcmV?d00001 diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/main_functions.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/main_functions.cc index 41350e8b9..ad97c3c5e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/main_functions.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/main_functions.cc @@ -32,7 +32,7 @@ TfLiteTensor* input = nullptr; TfLiteTensor* output = nullptr; int inference_count = 0; -constexpr int kTensorArenaSize = 2000; +constexpr int kTensorArenaSize = 2056; alignas(16) uint8_t tensor_arena[kTensorArenaSize]; } // namespace @@ -85,10 +85,8 @@ void loop() { static_cast(kInferencesPerCycle); float x = position * kXrange; - // Quantize the input from floating-point to integer - int8_t x_quantized = x / input->params.scale + input->params.zero_point; - // Place the quantized input in the model's input tensor - input->data.int8[0] = x_quantized; + // Place the input in the model's input tensor + input->data.f[0] = x; // Run inference, and report any error TfLiteStatus invoke_status = interpreter->Invoke(); @@ -97,10 +95,8 @@ void loop() { return; } - // Obtain the quantized output from model's output tensor - int8_t y_quantized = output->data.int8[0]; - // Dequantize the output from integer to floating-point - float y = (y_quantized - output->params.zero_point) * output->params.scale; + // Obtain the output from model's output tensor + float y = output->data.f[0]; // Output the results. A custom HandleOutput function can be implemented // for each supported hardware target. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb index 0aadd0bf0..3196383b8 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb @@ -3003,8 +3003,6 @@ "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", "# Enforce integer only quantization\n", "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n", - "converter.inference_input_type = tf.int8\n", - "converter.inference_output_type = tf.int8\n", "# Provide a representative dataset to ensure we quantize correctly.\n", "converter.representative_dataset = representative_dataset\n", "model_tflite = converter.convert()\n", diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py index 3388e1e3d..90d26d424 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py @@ -468,8 +468,6 @@ def representative_dataset(): converter.optimizations = [tf.lite.Optimize.DEFAULT] # Enforce integer only quantization converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] - converter.inference_input_type = tf.int8 - converter.inference_output_type = tf.int8 # Provide a representative dataset to ensure we quantize correctly. converter.representative_dataset = representative_dataset model_quant_tflite = converter.convert() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc index 573a4e57c..46d567669 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc @@ -233,9 +233,13 @@ $(MICRO_FEATURES_GENERATOR_TEST_SRCS),$(MICRO_FEATURES_GENERATOR_TEST_HDRS),$(MI $(eval $(call microlite_test,micro_speech_test,\ $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) -# Test the code for feature generation. -$(eval $(call microlite_test,simple_features_generator_test,\ -$(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +# TODO(b/268568089): This test is taking very long time to finish; causing the +# CI to run for a long time to finish. +ifneq ($(TARGET_ARCH), hifimini) + # Test the code for feature generation. + $(eval $(call microlite_test,simple_features_generator_test,\ + $(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +endif # Tests the audio provider module. $(eval $(call microlite_test,audio_provider_test,\ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD index 069fde48a..b7b870c97 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD @@ -30,7 +30,7 @@ py_test( srcs = ["evaluate_test.py"], data = [ "trained_lstm.tflite", - "trained_lstm_quant.tflite", + "trained_lstm_int8.tflite", ":sample_images", ], main = "evaluate_test.py", diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py index 406e7a868..efab0f6fe 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py @@ -91,7 +91,7 @@ def testModelAccuracy(self): class LSTMQuantModelTest(test_util.TensorFlowTestCase): - quant_model_path = os.path.join(PREFIX_PATH, "trained_lstm_quant.tflite") + quant_model_path = os.path.join(PREFIX_PATH, "trained_lstm_int8.tflite") input_shape = (1, 28, 28) output_shape = (1, 10) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite new file mode 100644 index 0000000000000000000000000000000000000000..636ea0bbec46d4f00a386332a38c82257f81805c GIT binary patch literal 13952 zcma)j2Y4LUb?(eg-<_T99oqoxq7WS<*sCatk|k4Cb1c7vrbQ@{LlsMuU-I&Ov7INm0diF21w3|L5Fu@11++ z%s~Kxp!*KoISL}k3I(7x$O&R2M1wpDp&$)`G6?#gfTDml_CU~nK=Dlwgn&E@2nD3M zswiq7we1c-20%}bKlO!2AOGXSN5>yN^31X0H`;zpQ4}|*SNmz#+I{|+@yCxpdgR!9 zcrO7wwO$A4;Ya^;{OI9FAAjaGC(+7$^e@A%Q~T0Ou!wz60pr0eub7R{??H zt;zk*GwWmf1CXZykq~o3MgR{1)*#?(WjIKwV^#H>Qxy(0rp}H!H>&PbeW;|iQQM9T z3=a(qK@fY5hl)2mFuYMMyB@xEUQxaS=m&s)0;qjzKcfb7mzk2P*zj^r=?_^P#7-)^VzkqFb`^)R7fTuwKG1n! ztJ!d9U_Y`WDCU&!m54!Sc9L}7nVd*z!}WYp9t~%Vu+%d2fAW6XA?EqS618b(&!GoD zK74y$=RNmgRvl*V-B52?EJ`P*WD-4X1+nYk?p=F!|KUz=TQ+xf#14l?;5whF&PD49 zl-hUb!B5<(t*q8^{QX}X-p>s8F2|SHS~Z@-Hh=EZPwX2RE_^Hi$3AiQU3cs=>5?g~ zMs7gj7*<`)+l+0cvD;JaRYDRBoA2KCKr^jHd#tS-;j&v~ElFvuo;{=9Oqp+MeakyLzHtiynXWV|H%@NIAx}q#B{N}muDe0clK4P{OY2YjA zH!qC*$)P>{Ax}Qvgfgdvfj@HJ$xSC>=igYUT{`#nZ|ohTE_oh`%HnzcHd`wKLwBOH zGu12|I&%N+FzqP$2`_C%I@^$6&abaWb?s!jYv26QTmbEB+qdz+h^16aW~#9;wM&>P zAMmuldNNlQ8J=Z?VT(Supm(lt8$9h>$Ex?>3-%x-YI60ZLNgWa>=*q~JceflXmHl# zMauUc(`wv9q2<{#D4}m%DRZ*iy7K=LkC?OzCIdQD+-Y7!abh4K_!~`y!gj&Gvias5 zSCck_e?u^AN}y-t5{<{3kmWp7a&g^(d>7gIgvOfHvZ;95u!TEyksH!z2Why|-_I}$ zulsZ}WIfMo`B9UPJ@fPbI6?Q_`6*j#gm;ern2UU6_j$^?z}f zZn~k&AyY)c>9sn#^;=5Mz8RS?Zo7SSufe?~yV&Qny6OT9~|gmQe>e_^5*mNXAIS%W_H2C;d>hQC5qrsP@jeB+Ce-PDrB~rckKPd;X}g# zgSkoybc<;5$?Pdnw`=;{$=Q|U)tR%uiaC%{s~+dbqQ@I<(-wM6keh-hdR93YMhJyCl_OtW`%Rh@!0du*?n3kA~iXv&S6BY!t=KuGU!X3 zSav|sR=nPYQJN};;~ej_4fMEb>3B4@yj-vN>f(?@n_(YqKoOrt)SGP{+Gp^RW7wXK zqIvVD?(Nw*(56jHHAoMEHAJ^IVE)XZP`@3z7=glc6OT`(78acyu4+kw1vy=5apYXl zpxcZfQd;2nmT70GW+{h82fJxPFJ+V%-ceJkW<%55Xf~alHfS zxB?3}l#wo66qu?rxH0SwFqAP65DEs{;MSeJJNqtPy2RoY7;bQ-BxG^rI=AZs4zJNF zQ?S*z^3N~TF|4W6z>d0mX`zqKi>ok7^C#OsTXyot-%y?6bL@M!kh$dQQLe{gslzW!6&$ ztpd4Gt3l6*6k}lYMoO+IrryYVYp2yE&&Ge6)cbvXPHMcZ=n z<;i%NW*ij-)sKYCWWZzS#1`OoFXVExGsZk$5az|jtN9$C&I`G`CZ%lMZqZhAP{=`V z@#l;DV9(f2{{h9IbcSvTF1srFIRd*nqu08_+aB1xk%5h_Sk5XcuU9!^JI{qa;ikpa zcOzFJ+TYQ0%l5FYJsb*JxHt=U@R&g_4sYuTpoz_&-qr@Eny6K~GC_}Zkfhb`5Yx4( zc+unO>9I+#otfmS9(V7UoYfM{z`&@r$+Oyz-?GsWLMPGi&W%CrVD|{*fnyimIeq?e zD+PM(-YIiBqlYLEky?1e#xj~}_QQC)#{+3ty~D3b=AaHcM>izeh`BmDdi6>n&mom~ z#k~3HgPKY)TFBNjax%nc7GoK!j*TSsh)zQd?{rL^x~$Q`N<5Vmn%aqzm81#n-gAq$ z(6SiEyxR`{$sKZvEue+rIc0_gf86jfgZwEdwb&97idiIm}pEx#g9Jf;wdZ z=Ddp14H?@vdU_l>qfxmH?daeC(BosR>{eZ8A^ zIXJMEG(ww)P##`cUpxN@E^2qq#9dk0Sx!!|40g3s@M)Lr-0r1gn|?_h4DPV~x@dBT z4t;a4t|WFP&KM!iF+CgJHBXz++Kc7}j+L&)A_-Tki}(CmXk5;;R+k+Uw+NhQ40%e? zb9WkH%`2Nd9ih?fF}9{Ol4Hi!RfBKJ;$MCh{U}^SWb0`r!r^iB{{ibkG>)<)t9n*G~VHfq$J0xbJ#!K z@rl~$MV#BCA zZ1u(D+V185)lr(>Id`reztb*EzKWkE1OM4+$^2Rn((+y{-NWGTVA=iNWxeBz=So!l;*ZEp30Kdq z#n{45bEIMNSvQE*m)pC)ZVQYKEyzo|inOwF$oPxOrosKv%%43moN{*SS89ttJJ@VG zq#rKWwKb>F+EX_YdDj-!($5qVZ4}G=`eTAX>3ZbIppELNhqFdjzDi_XcSW%(#@|&4 zJex!=aJNr??4hP&Dtjh+o8P{5k~a*NZ+$MacjNL(KofmkUcd%7TuNmrEhgQ6w?24K zcx_|dlA#gDMP<3om5kq=;IYYXyjYlLzIVHmx%(ft56pYYB=b+xSC=B+Kk!av0*$r| z-fXBtm}Tb?{S^ySjOm-{+Jn(fr#p!5>G~n!FvU5chZ4oM8O^ZeS_D{Ll zbjBi{8(4id{@&aO{T=7_zA(t{C;U8f!1&3QvL*Qj|K?E_!7rGcC%*VDXUW zM06~E+Y4I_=Iu78>Fo15L~^`w@8#D<|7Ia&Gw6rNzBIHqKC~~pEfwc}%-!~mdGM4L z0}u1$>J?`7Oe?J!Zajat{-?MvJN6HkjFsTULno)-$SICGjE52>W7{wG?%hqj_=dip3bel4J~Pc8 zQjl6Vbs>2$Lxr6=?Mmo~egF3*%Y&@PWP1C@jbAzYAIFWUm};3uLP+Mj~?AQeEAr z)oy)r|Esz4KMd7f7J0??htJ>9dCJs@VRMMBJ^JN@FIY)zk7=Ea$e7_zQrHrNCLR@@ zDIre7d?MK+zV+DWel>YQQjGt@Qz~(Fza@9xTG%^s=2x#yPR(q+<=Ys#7>ulTP0Wvd z=GD?-Df5D4bIAu~+&0r=3vOzJPU$UNQrS9xsbA+9xV)k{LE8l6+kOwWRX?Sx1Wc{q zLqCjm3CUh1`syY9ii3Cm2ma?@^IgzwjkWjIXNYPG!k_P-H|_kI@R@IBwNgT6s~dbL zo5edy!_LOaE~bqsT9MR``}LOS)z0d`V1g76lE>kbx9{kU*Pq~ON-S=G9dLc-i=4K*#WA|{qApzbA*f9ay4jHKVg?)iHN`q;lf(7*jB z1ib}l?u!s~=C1(e*FpSEVBh#V2zv5wA*k!0Am~TH$Z_Di5JZ9Uzz-nkL0|y+%AZ1z z_$&nR{}+NHz!>uPH(~#EK~W9^1ITN@mU6fT>=78~2WV7Ol&@Do9?+kG^83Jr{>ieU zK)?pmFDMES0Xh%*e1-+K3oy>V26bEWijutv`ycm0(BE`I(7oFrs2%9X0WiM{3_Jla z?nPh}`PKjgJpuHj_#o&VV8r-2Fj&QVA!vgQf;x9X&`!|*KG6Rn(BCtl&w#f2o@M5GB(OXcumTVXiHg6-B^5H0?F&j(g z!@6m$E8~8vIXIDm>D)9lx`U%N2KbR=ulXj!{n8vHi}k!ZS*TExvraLX zbiE>1vNnqD!4zhlT9=2ELueN6jHjVGzAD_}GZG%eq+y7f z+*Z_tn@X*EOPe3IqBV;N;kgZUVp~(EKqH&BLLNiYr)XR9P`PawN3^7FXltWRX2($3 zS?VkIATEqY?Cv4O%%$#bT20ed(r&QXg;Yy)CGqs^jwWH4He))s;;5aiFjOvED_Eig zG7SwX=i6*9OI?AKYI&v=!Buv;WZY;# zJ@uNNEH+6`Q`Sc+Y!BYm*$gEUX3t#F<+fFgsi-`R5$#AGkwUCC}G zgVv!C7vFX=*(24gtX!Jm4tTW2J|AD+J3F(%STJ#pOj8=L$@;x6I6l0z5*9d0kmGQ} zVxBYaFljOW>O^O&MiEOT62&}ua2?ZA*6eJU&1wMEuu?w z89Mb&NFS1>pcGQ51>H8i)fq71cn8&qF4R_}eACCZ6BI)yc*1A2hf~}_2I2Z$ojQOt zt?>j5MEg=}q256I^n*6N!;`D)fziu2UtUb}#t!>{v(0J9rb=2SL)EHTA!+0Aq-f*W zNUl!U;#4Lri&+~c+6{*xBUlTY32lGTjSjr(!O;`bdegQkD7(Bpzud*tpZ&OhtSX%@zSGvjkqH=CZg)+k~zdE@kXS6E#_TF$arS%(6`SpSI<{9flzCuuDJ4^pm8298cn~?r zw3QG|Em^HxaixG%6e?+Yz1?`hlw?!nLi>EfY@lRA#jEk_Io-0%n_8U{vv!ga8O7fi zHciv@W`!w+N)=kvVb(0eZf>r?HNNVDLRnld*vecU!@`zJ<+=`77s7&|nA$G1PEs|d ze>FjpJFRxV!YnxlU&DSwnV6BYGnCZTK4356GhJOT2T!3M>Ap9vx(J8+p3dCT#EvaA z*VKr(+@!EN&TDsx4xOn| zVyBX~4!AI#Mao(7`j`)a8j`hOl9d81^KvWDB1?QLPigcCE0nG)Mu}#Oij*@{V+v+& zn2ogxBI7z818e4`<{aBJXG~6y20~_oS4pL*hTWX3*r^3O zrbV@hoP`$ZHpM{K8hCZZqw^N@CAMX#=BzMbPt+J5SZFsS7&bG(!@;*^99rJsikB{x1;t~a$Wf9E&URE; z&f7FbYLUbcZM1}?0y3+M6S!eA>Md7 zQ9|l6Ew0B`6bgh0kWDi@TWJp=c(aY{tarpLO3mV%6yW@z3o~i_geO7G=*6PjHj^+DV+N1D z!-iK;BtO)*N-W5X?5-KB}tn;C9Gqwzp1jMUkI z`nMzbQ^zf2HAUYf#-aCK+x+$spiF zH7Bjs9HX_-I$ejx9Ui<;P1xq=x`(*9=ndnoTVjsj=oN9HesOW{ZtrT$XP2}iEzH)Q zHs@34liS)m)93s>La@WgI7~AXQz>XSx4BAhlSXXBY-hWg3xW&fEgqY9=~B~y+@Yms zJ9Bk#Evt38in{1Tmc+N)q*uEx)%8l23UtR?xyTB^ddbw8k!eJqt>`<3lX-G-m2LD{ zCNB1*5KR*AGz}ItWD=4!?c7@n!FbKJ5EyQ6m&zeA-ekIBuh(m-xReTQ@s47|c5JTP zbS5`CB==ywbLXIL@&q_%nIiQx()N3nOd+V)LUQ;iYnYnI9 zg>Jc_rOH;SwNNNXKFo9x_ms7npkbNXOmXbwMRY`O&`8!~JJW4T61{rZT$$lV5zJR{ zRR(fKTr>~b@b=PNsjDU1XD)F64<&4ZxoUN%NLzzv>keK^ zX%SIia}^Vj!JSQmDCu~uKr<4hEG=nWvlh%CXsR&HqPjK>i8rh%LDuCE!ONoo7RpmW zjl~q~l=Q(G+fg$0YV@KeJr*~U?Uts{R;J`KW`Piy5)GQN*kRD~IEABrjQ}xKE~<`ukcu7t;?66aeMlPg3d3QhJhqm8#0xp&!_qRz58sIOf`xpO*mehjXi z;%b7_H*7J%SA_bC;;Q#Mgfj#sR~=iJLHV*!&%kb3=cr|9*?<{)9POndSr&^HqRYsL zow}N_Wz)J)*$T1QX8TANHv5?y_ z8>dX?R6%4d?{U7u~WfPMd_xr&H+O$eF0CFS~<7alZOc!3E^(3 zBF#7IY`tv6Hd5&ox{A5ljHYFx=w7H>ot98{vS3;Y*d2CPyuM<~$8C0#VV#?h85)pSgPsjzve!P~mJ<)8=)Rje1B#q)aal~NJ! z@M`^OJyc8&igr&$?<`gpDLYR4HBK_Yc@eXvR_m_i(*$K;e3D0sk-N&+h7255=3^Dg z+HjgW(?xnWDp9MLmpD@>L;4!6>0HQY_ZBP7Nr)ECDK;I+H6bifX?5nSk!(v|&KEbp z)+RJB3#k>&ay8v3)#6D!Bq1!Y=CiH2>O8U9D2aGcuoqc=KFO7|v$>Qwv#99#YDKQ; z*u<1A&rYQ$ic9kz($IveGYxY3swP@nicMB$QXV5#uOrhLEH+1E;EOqmiRk5AjZ0~5 z9T`6pYt$ko7q!CFE4)-Q6ON?ClTZ?wvfew5ms4eKk zD;yK1X~F_Ne3FzS37Hzuh8%~5T(qz>53Q!aXUD-# z2eU8PVQV(*nmI|cz(H&^>~}bK6g zKESk9b<5=iU>rZ&y8xy0vx?E5vzKb~RMi?uo*11rr?trnt*d&P*=k$X(x|NVRdz?sV zyEhW>&cuuf~5s^NxD?G#0o~m=aREc3r7gJ$(m}V980{NbU8Q} zg@o-2U&l)GnN6~+bt~u;VdPo?-8=#%F3fLHY&E}>J!{CwH7$lnjd%eTYCMO-SgM)U z2((}YUN|L`&lDJps^%N3FvPIBLXl((nN}V45i$-(2oKz9$Wf@4l+dct$#E@h9kq2R zXY>L_&^0MzuuXFLMoz$knm1}eQUk301xR;RlF$+<2CJ8N2c2voWK)mko6SnOPPA3e z68(JEgBi;(3on4nUb!Shvsyo2HOaIB40$WoVq~GFR4A97hh42EA&T}DOQb#vIjT-M z2)CqK4!n+zaL8PWZS|(frpH`?qCAqW^p@F4rB0G%sMcwXmaeX3Vlh_^T2YErEVUF1 zSr*XCOR+>VHRp`i=3B}9LJgx=>`R$i(fUqB%EnI#&HO5YYOeG!jT&mZXwY%R3GgP+ zqMBmN9MahZ!%2e?WoF7KE1ET212v6M+Ga_=K`}(~I-W5UtJrdNqk)QLUG9=xoVOYz zT`Z|vQ2b3x8X+9ZnabOK8WFNe>CJSzV5t|Y#5>3CH(&VL-E{WE%moIOT z9E&1Nyp$`hB2mjyB|USYIH>Q8wXo(Jk#rKuON(Zuq|FFc9qmIk1*B9ZQItV>!Pqcr zIU`<^GrD?RG$3mR-V+zs8G)Gh*ltNeIQV->3A9;1!vZ2NV7OaH+q5%G1rGcRa3WOHi3Z( z;EUBlttHa0&%k1xBO-LeNo!WL*{n$;ymlJqTd1jM)Q{*CPK?)0PHp^xK=V{R-Yst< zi`mjjgR$Gv7j*i(xuyh7?eMCdub6@5il2A;C0jOOAT{FDMqunrbvB8H$?braYRztA z9U4eVRKt(r{-#$+UrCcPJ-I(9zX}TIBa*Yv5H}o1dbvGOHXG|?i&Gqg}~&z z=7*OCcJ)&fuw4Tmzap?bQ^2Nf2ext+*q%R6LeReg`}4CcMS1vIA5a8=q@wztkage_ z2}t#Yyz9=}?+M)U#N)u%@c8)A0SE?V>%Mc$4UbFJrw_Om-tf8n7HISxKtBMa%JbNf zM;`~#T6zk^UjwAt|JAxvfM*+sL7>z)0BFr!@FqV(2>Q})00pVOmud{m;UK)nUk8w` z^9L0qFq5yz0)_%jfpJ{#qvEQ5mujrmeb6US^=VZ5s_j)$+p6tU|4TIwG`Lo#`k5Ml zC*yiRLFqcbq4k2>HiCEyAn-{50w#P&_3>1722Aj4ZB^e+Rpxbsz+O;{s`x4&m8Tk1 z-K%2)p9|J}EmeO~VD^W=l&q=qo_6sxN67 zDD4G6#{rRm{uD4&2l(WAGiU^?L68J*KfvD!kg7irsQuul4b-W6QDs-<{L(d{)ilU$ zSLgoT<42Bp{)aQ^UJV1zM zd3x{Bqwkle7wky@d1KrvZxE70T6$P*~7<<96vt(_~1R` zPd{?x$#Kx+#`x8J{LK&WQ`aE~Yy7_QU*LEA;bQ~OJpSm>N1qyhNLBNZV~5uj@zD6; zV^1G{Oa)W*rpl=9-SzRPd&Ii7=2dUs^*~&oV_2QnzV*U2IcV@4`d#Dh0By$~epnUY z3*(0$dGzSf@y8E8{m_wPoBDl-=fh7t^~dAKKhX0%p6cpf z_gY`y*Qy*4^q#)daj3GWd-R4Z>xqvX|Fdhdz-xL|_X7yT9`JLVU)S^X^{!T259{Oj z(pL3ZrOx@2Pme!y^ohftf9Q$FpFTeP-kSQT`Bk4i>eFX^53lpRKEFFaP5v6+>-%}F zIrvd|s_R;PK7EKMD7(IoRi2*(=akx9O{-&4u|S|y{5wINs-us>@84nBp_U(c>e1tm z46Kdj$m7R{!HCxJRXlZlocI8~`s@W^4gV*<1AqTls@p#Te@N}D%6}cd9h|=)yC(le zkWioNAgtljpiY%vZF_w^sg-I_^SAt|YlkY&`^T{LefWR*0etn$0AUUP*zdt#X7_vq z{+9RQuYLfZz6t;Q@4>GGogaZedIMh__df8A8$>tsqn>pjtjXU8>VB7g&i>^a+dr(I zk@w+$`vdsu*>D~ItG@?dr~URv;19nKUn9L=e)WvLj-UTM_{Y!TJJj{?{`q}x{iyo+ z>`nOU;!^bif=0b#+zIM_+xnL;S+;ywH=Ebim5TeT58$rv<#pWe{0`hV{(EinhjGVl z;F{(4^r6ZNt_*5elNVTyf2+I?nB5;C@1`5LpZfqVxPIKgy${s=Ha&dfmAxNbg_UW%Zt}!l{@lziW37 z&3^;wHcfQ0$cwID*S6#!`Wl;C*^?3)c49c}Tsa=(I{du%5>$S73@BahkOZMpi literal 0 HcmV?d00001 diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc index 0cbca9e84..0be91588a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc @@ -54,9 +54,13 @@ $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/models/person_detec #Find any platform - specific rules for this example. include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/person_detection/*/Makefile.inc) -# Tests loading and running a vision model. -$(eval $(call microlite_test,person_detection_test,\ -$(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS))) +# TODO(b/268568089): This test is taking very long time to finish; causing the +# CI to run for a long time to finish. +ifneq ($(TARGET_ARCH), hifimini) + # Tests loading and running a vision model. + $(eval $(call microlite_test,person_detection_test,\ + $(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS))) +endif # Tests the image provider module. $(eval $(call microlite_test,image_provider_test,\ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc index 4403edc87..2ec3a1bf5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc @@ -55,8 +55,8 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, ReluOpData* data) { float act_min = 0.0; float act_max = std::numeric_limits::infinity(); - double real_multiplier = static_cast(input->params.scale) / - static_cast(output->params.scale); + double real_multiplier = + static_cast(input->params.scale / output->params.scale); const RuntimeShape input_shape = GetTensorShape(input); const RuntimeShape output_shape = GetTensorShape(output); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc index a390a7355..dbcd57c2e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,9 +21,8 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace ceil { + +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; @@ -64,12 +63,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace ceil + +} // namespace TfLiteRegistration Register_CEIL() { - return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval); + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc index 597856cd2..31ab92591 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,9 +22,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace comparisons { + namespace { struct OpData { @@ -530,8 +528,6 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -581,38 +577,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace comparisons +} // namespace TfLiteRegistration Register_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::EqualEval); + return tflite::micro::RegisterOp(Init, Prepare, EqualEval); } TfLiteRegistration Register_NOT_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::NotEqualEval); + return tflite::micro::RegisterOp(Init, Prepare, NotEqualEval); } TfLiteRegistration Register_GREATER() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::GreaterEval); + return tflite::micro::RegisterOp(Init, Prepare, GreaterEval); } TfLiteRegistration Register_GREATER_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::GreaterEqualEval); + return tflite::micro::RegisterOp(Init, Prepare, GreaterEqualEval); } TfLiteRegistration Register_LESS() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::LessEval); + return tflite::micro::RegisterOp(Init, Prepare, LessEval); } TfLiteRegistration Register_LESS_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::LessEqualEval); + return tflite::micro::RegisterOp(Init, Prepare, LessEqualEval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc index a3f1cc346..59157564e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace concatenation { + +namespace { constexpr int kMaxInputNum = 10; // Maximum number of input tensors constexpr int kOutputTensor = 0; @@ -251,13 +250,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace concatenation +} // namespace TfLiteRegistration Register_CONCATENATION() { - return tflite::micro::RegisterOp(concatenation::Init, concatenation::Prepare, - concatenation::Eval); + return tflite::micro::RegisterOp(Init, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc index 139eda7f5..163364372 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/conv.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -113,14 +114,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt4: { int8_t* unpacked_filter_data = static_cast( context->GetScratchBuffer(context, data.filter_buffer_index)); - reference_integer_ops::ConvPerChannelWithPackedInt4Weights( + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::ConvPerChannel( ConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc index c5519b544..2eec76aec 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc @@ -61,6 +61,9 @@ static TfLiteConvParams common_conv_params = { TF_LITE_MICRO_TESTS_BEGIN +#if !defined(VISION_P6) // TODO(b/268384678): xtensa vision p6 kernels break + // this test, will if def till properly investigated. + TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) { const int output_dims_count = 12; int8_t output_data[output_dims_count]; @@ -90,6 +93,9 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) { &tflite::testing::common_conv_params, tflite::Register_CONV_2D(), output_data, kTfLiteInt4)); } + +#endif // !defined(VISION_P6) + #if !defined(XTENSA) // TODO(b/170321206): xtensa kernels are less general than // reference kernels and we ifdef out test cases that are // currently known to fail. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc index e872d4ac6..1bdb77cb9 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -66,31 +67,34 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteInt8: { switch (filter->type) { - case kTfLiteInt8: { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); reference_integer_ops::DepthwiseConvPerChannel( DepthwiseConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; } - case kTfLiteInt4: { - int8_t* unpacked_filter_data = static_cast( - context->GetScratchBuffer(context, data.filter_buffer_index)); - reference_integer_ops::DepthwiseConvPerChannelWithPackedInt4Weights( + case kTfLiteInt8: { + reference_integer_ops::DepthwiseConvPerChannel( DepthwiseConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index e39f33d5e..bc958e0e3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -1,3 +1,4 @@ + /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,15 +25,23 @@ namespace tflite { namespace testing { namespace { -#if !defined(XTENSA) // Needed to avoid build errors from unused variables. -constexpr int kMaxFilterChannels = 64; -constexpr int kMaxBiasChannels = 64; -#endif // !defined(XTENSA) - // Index of the output tensor in context->tensors, specific to // DepthwiseConv. constexpr int kOutputTensorIndex = 3; +// TODO(b/268384678): xtensa vision p6 kernels breaks int4 test +// due to recent added optimized kernel support to xtensa for int4. +// The corresponding test is disabled while investigation is being +// done. Corresponding variables used only in that test have to be +// if def'd out to avoid unused variable errors for vision p6. + +#if !defined(VISION_P6) + +constexpr int kMaxFilterChannels = 64; +constexpr int kMaxBiasChannels = 64; + +#endif // !defined(VISION_P6) + // Creates a DepthwiseConv opeerator, calls it with the provided input tensors // and some defaults parameters, and compares the output with // expected_output_data. @@ -79,33 +88,11 @@ TfLiteStatus ValidateDepthwiseConvGoldens( return kTfLiteOk; } -#if !defined(XTENSA) // Needed to avoid build errors from unsused functions. -void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data, - int* filter_dims_data, const float* filter_data, - int* bias_dims_data, const float* bias_data, - const float* expected_output_data, - int* output_dims_data, - TfLiteDepthwiseConvParams* conv_params, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateTensor(input_data, input_dims), - CreateTensor(filter_data, filter_dims), - CreateTensor(bias_data, bias_dims), - CreateTensor(output_data, output_dims), - }; - - ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, - conv_params, 1e-5, tensors_size, tensors); -} +// TODO(b/268384678): xtensa vision p6 kernels breaks int4 test +// due to recent added optimized kernel support to xtensa for int4. +// The corresponding test is disabled while this is investegated in +// order for the vision p6 nightly build to be green. +#if !defined(VISION_P6) void TestDepthwiseConvQuantizedPerChannel( int* input_dims_data, const float* input_data, int8_t* input_quantized, @@ -175,6 +162,39 @@ void TestDepthwiseConvQuantizedPerChannel( output_dims_count, conv_params, 1.0, tensors_size, tensors)); } +#endif // !defined(VISION_P6) + +// Xtensa kernels do not support float activations., and the corresponding tests +// are disabled. As a result, helper functions that are only needed for float +// kernel tests also need to be ifdef'd out to avoid build errors due to unused +// functions. +#if !defined(XTENSA) +void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data, + int* filter_dims_data, const float* filter_data, + int* bias_dims_data, const float* bias_data, + const float* expected_output_data, + int* output_dims_data, + TfLiteDepthwiseConvParams* conv_params, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateTensor(input_data, input_dims), + CreateTensor(filter_data, filter_dims), + CreateTensor(bias_data, bias_dims), + CreateTensor(output_data, output_dims), + }; + + ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, + conv_params, 1e-5, tensors_size, tensors); +} #endif // !defined(XTENSA) @@ -460,54 +480,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) { golden, output_dims, &conv_params, output_float); } -// Quantizing int8-ranged filter values down to int4 doesn't always yield the -// accuracy sufficient to meet the golden values. So this test was created by -// handcrafting filter values within the int4 range, and the golden data was -// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8 -// quantization, and ensuring that int4 quantization yields the same outputs. -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) { - const int input_elements = 12; - int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -5, 7, -6, 7, - 5, 6, 7, 4, 2, -5, 4, 0}; - const int bias_elements = 4; - int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 0, 26, 29, 84, 6, 46, 45, 114, - }; - int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[input_elements]; - int8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - int8_t golden_quantized[output_elements]; - - TfLiteDepthwiseConvParams conv_params; - conv_params.activation = kTfLiteActNone; - conv_params.dilation_width_factor = 1; - conv_params.dilation_height_factor = 1; - conv_params.stride_height = 1; - conv_params.stride_width = 1; - - tflite::testing::TestDepthwiseConvQuantizedPerChannel( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, bias_shape, bias_values, - bias_quantized, output_shape, golden, golden_quantized, output_data, - output_scale, output_zero_point, &conv_params, kTfLiteInt4); -} - TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { const float input_scale = 1.0f; const float filter_scale = 1.0f; @@ -983,4 +955,58 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x1ShouldMatchGolden) { kQuantizationTolerance, kTensorsSize, tensors)); } +#if !defined(VISION_P6) +// TODO(b/268384678): xtensa vision p6 kernels break +// this test, will if def till properly investigated. + +// Quantizing int8-ranged filter values down to int4 doesn't always yield the +// accuracy sufficient to meet the golden values. So this test was created by +// handcrafting filter values within the int4 range, and the golden data was +// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8 +// quantization, and ensuring that int4 quantization yields the same outputs. +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) { + const int input_elements = 12; + int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -5, 7, -6, 7, + 5, 6, 7, 4, 2, -5, 4, 0}; + const int bias_elements = 4; + int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 0, 26, 29, 84, 6, 46, 45, 114, + }; + int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + + TfLiteDepthwiseConvParams conv_params; + conv_params.activation = kTfLiteActNone; + conv_params.dilation_width_factor = 1; + conv_params.dilation_height_factor = 1; + conv_params.stride_height = 1; + conv_params.stride_width = 1; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, &conv_params, kTfLiteInt4); +} + +#endif // !defined(VISION_P6) + TF_LITE_MICRO_TESTS_END diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc index 6b2a4cc25..207b5c4b4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,8 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace floor { + +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; @@ -39,12 +38,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorData(output)); return kTfLiteOk; } -} // namespace floor + +} // namespace TfLiteRegistration Register_FLOOR() { - return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval); + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc index a148ce448..439bc4c29 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" @@ -113,29 +114,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt8: { switch (filter->type) { - case kTfLiteInt8: { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); tflite::reference_integer_ops::FullyConnected( FullyConnectedParamsQuantized(data), tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; } - case kTfLiteInt4: { - int8_t* unpacked_filter_data = static_cast( - context->GetScratchBuffer(context, data.filter_buffer_index)); - tflite::reference_integer_ops::FullyConnectedWithPackedInt4Weights( + case kTfLiteInt8: { + tflite::reference_integer_ops::FullyConnected( FullyConnectedParamsQuantized(data), tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h index 2083c3fbb..7dc1ebb2e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h @@ -73,7 +73,7 @@ TfLiteStatus CalculateOpDataFullyConnected( // (reference or optimized) must define this function. TfLiteRegistration Register_FULLY_CONNECTED(); -#if defined(CMSIS_NN) || defined(HEXAGON) +#if defined(CMSIS_NN) || defined(HEXAGON) || defined(XTENSA) // Returns a TfLiteRegistration struct for kernel variant that only supports // int8. TfLiteRegistration Register_FULLY_CONNECTED_INT8(); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc index e7d0056c3..5a8d312de 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc @@ -64,12 +64,13 @@ TfLiteStatus CalculateOpDataFullyConnected( QuantizeMultiplier(real_multiplier, &data->output_multiplier, &data->output_shift); - data->input_zero_point = input->params.zero_point; // Filter weights will always be symmetric quantized since we only support // int8 quantization. See // https://github.com/tensorflow/tensorflow/issues/44912 for additional // context. TFLITE_DCHECK(filter->params.zero_point == 0); + + data->input_zero_point = input->params.zero_point; data->filter_zero_point = filter->params.zero_point; data->output_zero_point = output->params.zero_point; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc index 1f46dd1ef..3a02e8156 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc @@ -86,6 +86,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Assign to output the input type. output->type = params->type; + // The tensor output dims must be relocated + // from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + // TFLM gather_nd does not create the output tensor, but it needs to ensure // that the output shape is correct. The result shape is // indices.shape[:-1] + params.shape[indices.shape[-1]:] diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc index 0499260f5..76031b872 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/micro_log.h" @@ -256,5 +257,24 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, return kTfLiteOk; } +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor) { + if (tensor->type != kTfLiteInt4) { + return *tensor; + } + + TfLiteEvalTensor new_tensor; + new_tensor.data.data = static_cast( + context->GetScratchBuffer(context, scratch_buffer_index)); + new_tensor.dims = tensor->dims; + new_tensor.type = kTfLiteInt8; + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(tensor), + tflite::micro::GetTensorShape(tensor).FlatSize(), + tflite::micro::GetTensorData(&new_tensor)); + return new_tensor; +} + } // namespace micro } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h index aa369605e..f30ae44c3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h @@ -131,6 +131,14 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, MicroGraph* graph_info, int subgraph_idx); +// If tensor is INT4, make a new TfLiteEvalTensor with data unpacked into +// a scratch buffer. The returned tensor will have the kTfLiteInt8 type. +// Assume scratch buffer is previously requested in Prepare, and +// scratch_buffer_index can be used to retrieve that buffer. +// If the tensor is not INT4, a shallow copy is returned. +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor); } // namespace micro } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc index 7d3cb176f..3d1ffebb6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc @@ -51,16 +51,15 @@ TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, data->output_zero_point = output->params.zero_point; int output_shift_alpha; - double alpha_multiplier = static_cast(input->params.scale) * - static_cast(params->alpha) / - static_cast(output->params.scale); + double alpha_multiplier = static_cast( + input->params.scale * params->alpha / output->params.scale); QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, &output_shift_alpha); data->output_shift_alpha = static_cast(output_shift_alpha); int output_shift_identity; - double identity_multiplier = static_cast(input->params.scale) / - static_cast(output->params.scale); + double identity_multiplier = + static_cast(input->params.scale / output->params.scale); QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, &output_shift_identity); data->output_shift_identity = static_cast(output_shift_identity); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc index 555ecd724..4666e908a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,1446 +14,209 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/micro/kernels/lstm_eval.h" -#include -#include -#include -#include +#include -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/types.h" namespace tflite { namespace lstm_internal { -// Calculates a single LSTM gate. -// -// Implements the following formula: (* is matrix multiply) -// gate = activate(W_input * input + W_aux * aux_input + -// W_peephole * cell + W_recurrent * prev_output + bias) -// with layer norm: -// gate = activate(W_norm * normalize(...) + bias) // not adding bias inside -// -// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) -// -// Parameters: -// Input vectors (to LSTM): | Size: | Optional? -// input | n_input | -// aux_input | n_aux_input | y (bidir LSTM) -// Input vectors (persistent states): -// output_state | n_output | -// cell_state | n_cell | -// 'Constant' inputs: -// input_to_gate_weights | n_cell * n_input | -// aux_input_to_gate_weights | n_cell * n_aux_input | y (bidir LSTM) -// recurrent_to_gate_weights | n_cell * n_output | -// cell_to_gate_weights | n_cell | y (peephole) -// gate_bias | n_cell | -// layer_norm_coefficients | n_cell | y (layer norm) -// Output vector: -// gate | n_cell | -// Scalar parameters: -// n_batch - batch size / number of vectors -// n_input, n_aux_input, n_output, n_cell - size of vectors. -// activation - activation to use. -// is_input_all_zeros, is_aux_input_all_zeros - if input vectors are all zero. -// use_layer_norm - if doing layer norm LSTM. -void CalculateLstmGateFloat( - const float* input, const float* input_to_gate_weights, - const float* aux_input, const float* aux_input_to_gate_weights, - const float* output_state, const float* recurrent_to_gate_weights, - const float* cell_state, const float* cell_to_gate_weights, - const float* layer_norm_coefficients, const float* gate_bias, - const int n_batch, const int n_input, const int n_aux_input, - const int n_output, const int n_cell, - const TfLiteFusedActivation activation, float* gate, - const bool is_input_all_zeros, const bool is_aux_input_all_zeros) { - const bool use_peephole = (cell_to_gate_weights != nullptr); - const bool use_layer_norm = (layer_norm_coefficients != nullptr); - // Initialize scratch buffers with bias for regular lstm or initialize with - // zero for layer norm lstm. - if (use_layer_norm) { - memset(gate, 0, n_cell * n_batch * sizeof(float)); - } else { - tflite::tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, - gate); - } - // For each batch and cell: compute input_weight * input. - // Skip if input is all zeros. - if (!is_input_all_zeros) { - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_gate_weights, n_cell, n_input, input, n_batch, gate); - } - // For each batch and cell: compute aux_input_weight * aux_input. - // Skip if auxiliary input is not available or all zeros. - if (!is_aux_input_all_zeros) { - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - aux_input_to_gate_weights, n_cell, n_aux_input, aux_input, n_batch, - gate); - } - // For each batch and cell: compute recurrent_weight * output_state. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_gate_weights, n_cell, n_output, output_state, n_batch, gate); - // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM) - if (use_peephole) { - tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_gate_weights, n_cell, cell_state, n_batch, gate); - } - // Do layer normalization (if layer norm LSTM) - if (use_layer_norm) { - tflite::tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch); - tflite::tensor_utils::VectorBatchVectorCwiseProduct( - layer_norm_coefficients, n_cell, gate, n_batch, gate); - tflite::tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, - gate); - } - // Apply activation - tflite::PortableApplyActivationToVector(gate, n_batch * n_cell, activation, - gate); -} - -// Updates the LSTM cell state, used by both float and hybrid LSTM versions. -// -// Implements the following formula: -// cell_state_new = clip(forget_gate * cell_state + input_gate * cell_gate) -// -// With CIFG LSTM, input gate is replaced by (1-forget_gate). -// -// Parameters: -// - n_batch, n_cell: sizes of vectors -// - cell_state: input/output vector, size n_batch*n_cell -// - input_gate: input vector, size n_batch*n_cell. -// - forget_gate: input/scratch vector, size n_batch*n_cell, modified with CIFG -// - cell_gate: input vector, size n_batch*n_cell. -// - use_cifg: use 1-forget_gate instead of input_gate. -// - clip: if > 0, clip the resulting cell state to [-clip, +clip]. -void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state, - const float* input_gate, float* forget_gate, - const float* cell_gate, bool use_cifg, float clip) { - tflite::tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, - n_batch * n_cell, cell_state); - - if (use_cifg) { - // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as - // scratch, as input_gate array is not allocated in this case. (Be careful - // not to write to the scratch before reading the forget gate data.) - float* scratch = forget_gate; - tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch); - tflite::tensor_utils::VectorVectorCwiseProductAccumulate( - cell_gate, scratch, n_batch * n_cell, cell_state); - } else { - tflite::tensor_utils::VectorVectorCwiseProductAccumulate( - cell_gate, input_gate, n_batch * n_cell, cell_state); - } - if (clip > 0.0f) { - tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip); +const int32_t kInt16Max = std::numeric_limits::max(); +const int32_t kInt16Min = std::numeric_limits::min(); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + int32_t sum = input_1[index] + input_2[index]; + const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); + output[index] = static_cast(sum_clamped); + } } } -// Calculates the output state tensor of an LSTM step. -// -// Implements the following formula: -// output_no_projection = output_gate .* activate(cell_state) -// (elementwise vector product) -// If no projection is used: -// output = output_state = output_no_projection -// With projection: -// output = output_state = clip(W*output_no_projection + bias) -// -// Output might not have a different 'stride' than n_batch, so we need to copy. -// -// Parameters: -// - n_batch: batches: the number of distinct vectors in each array. -// - n_cell, n_output: sizes of vectors. -// - cell_state, output_gate: input vectors, size n_batch*n_cell. -// - projection_weights, projection_weights_scale, projection_bias: -// constant inputs, describing projection matrix and bias. -// - proj_clip: if > 0, clip the output of the projection. -// - output_state: output vector, size n_batch*n_output. Must be contigous. -// - scratch: scratch area, size n_batch*n_cell. -void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, - const float* cell_state, const float* output_gate, - TfLiteFusedActivation activation, - const float* projection_weights, - const float* projection_bias, - const float proj_clip, float* output_state, - float* scratch) { - tflite::PortableApplyActivationToVector(cell_state, n_batch * n_cell, - activation, scratch); - tflite::tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, - n_batch * n_cell, scratch); - - const bool use_projection = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - - if (use_projection) { - if (use_projection_bias) { - tflite::tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, - n_batch, output_state); - } else { - memset(output_state, 0, n_batch * n_output * sizeof(float)); +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + output[index] = input_1[index] + input_2[index]; } - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights, n_output, n_cell, scratch, n_batch, output_state); - if (proj_clip > 0.0f) { - tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output, - proj_clip); - } - } else { - std::memcpy(output_state, scratch, n_batch * n_output * sizeof(float)); } } -// Calculates a single LSTM gate, int8x8_16 version. -// Implements the same functionality as CalculateLstmGateFloat. -void CalculateLstmGateInteger8x8_16( - // Input and weights - const int8_t* input, const int8_t* input_to_gate_weights, - const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a, - const int32_t input_to_gate_scale_b, - // Output state and weights - const int8_t* output_state, const int8_t* recurrent_to_gate_weights, - const int32_t* recurrent_to_gate_bias, - const int32_t recurrent_to_gate_scale_a, - const int32_t recurrent_to_gate_scale_b, - // Cell state and weights - const int16_t* cell_state, const int16_t* cell_to_gate_weights, - const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b, - // Layer normalization parameters (layer norm LSTM) - const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias, - const int32_t layer_norm_input_scale_a, - const int32_t layer_norm_input_scale_b, - const int32_t layer_norm_variance_guard, - // Array sizes - const int n_batch, const int n_input, const int n_output, const int n_cell, - const TfLiteFusedActivation activation, - // Output - int16_t* gate, - // Parameters for performance optimizations - // Scratch arrays - int32_t* scratch5) { - const bool use_peephole = (cell_to_gate_weights != nullptr); - const bool use_layer_norm = (layer_norm_coefficients != nullptr); - - // Initialize scratch buffers with zeros. Note that unlike float and hybrid - // versions, bias is only used in layer normalization. - memset(gate, 0, n_batch * n_cell * sizeof(int16_t)); - // For each batch and cell: compute input_weight * input. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input, input_to_gate_bias, input_to_gate_weights, input_to_gate_scale_a, - input_to_gate_scale_b, n_batch, n_input, n_cell, 0, scratch5, gate, - nullptr); - // Note: no aux_input. - // For each batch and cell: compute recurrent_weight * output_state. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - output_state, recurrent_to_gate_bias, recurrent_to_gate_weights, - recurrent_to_gate_scale_a, recurrent_to_gate_scale_b, n_batch, n_output, - n_cell, 0, scratch5, gate, nullptr); - // For each batch and cell: compute cell_weight * cell_state (peephole LSTM) - if (use_peephole) { - tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_gate_weights, n_output, cell_state, n_batch, - cell_to_gate_scale_a, cell_to_gate_scale_b, gate); - } - // Do layer normalization (if layer norm LSTM) - if (use_layer_norm) { - tflite::tensor_utils::ApplyLayerNorm( - gate, layer_norm_coefficients, layer_norm_bias, - layer_norm_input_scale_a, layer_norm_input_scale_b, - layer_norm_variance_guard, n_batch, n_cell, gate); - } - - // Apply activation - switch (activation) { - case kTfLiteActSigmoid: - - reference_integer_ops::Logistic( - 0 /*data->input_multiplier*/, 0 /*data->input_left_shift */, - n_batch * n_cell /*NumElements(input->dims)*/, - gate /* tflite::micro::GetTensorData(input) */, - gate /*tflite::micro::GetTensorData(output) */); - - break; - case kTfLiteActTanh: { - int32_t dims_data = n_batch * n_cell; - RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data); - reference_integer_ops::Tanh(0, 0, tanh_inp_shape, gate, tanh_inp_shape, - gate); - } break; - default: - // Only Sigmoid or Tanh is used. - TFLITE_ASSERT_FALSE; - } +void Sigmoid(const RuntimeShape& data_shape, int16_t* data) { + reference_integer_ops::Logistic( + 0 /*data->input_multiplier*/, 0 /*data->input_left_shift */, + data_shape.FlatSize() /*NumElements(input->dims)*/, + data /* tflite::micro::GetTensorData(input) */, + data /*tflite::micro::GetTensorData(output) */); } -// Updates the LSTM cell state, used by both integer LSTM versions. -// Also see UpdateLstmCellFloat. -// -// Parameters: -// - n_batch, n_cell: sizes of vectors -// - cell_state: input/output vector, size n_batch*n_cell -// - cell_state_scale: scaling factor of cell state. -// - input_gate: input vector, size n_batch*n_cell. -// - forget_gate: input/scratch vector, size n_batch*n_cell, always modified. -// - cell_gate: input vector, size n_batch*n_cell. -// - use_cifg: use 1-forget_gate instead of input_gate. -// - clip: if > 0, clip the resulting cell state to [-clip, +clip]. -void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* input_gate, - int16_t* forget_gate, const int16_t* cell_gate, - bool use_cifg, int16_t clip) { - // Use the forget_gate array as scratch, as input_gate array is not allocated - // in CIFG case. (Be careful not to write to the scratch before reading the - // forget gate data.) - int16_t* scratch = forget_gate; - - tflite::tensor_utils::CwiseMul(forget_gate, cell_state, n_batch, n_cell, 15, - cell_state); - if (use_cifg) { - tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch); - tflite::tensor_utils::CwiseMul(scratch, cell_gate, n_batch, n_cell, - 30 + cell_state_scale, scratch); - } else { - tflite::tensor_utils::CwiseMul(input_gate, cell_gate, n_batch, n_cell, - 30 + cell_state_scale, scratch); - } - tflite::tensor_utils::CwiseAdd(cell_state, scratch, n_batch, n_cell, - cell_state); - - if (clip > 0) { - tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip); - } +void Sigmoid(const RuntimeShape& data_shape, float* data) { + reference_ops::Logistic(data_shape, data, data_shape, data); } -// Calculates the output state tensor of an LSTM step. See Float and hybrid -// versions as well. -// -// Parameters: -// - n_batch: batches: the number of distinct vectors in each array. -// - n_cell, n_output: sizes of vectors. -// - cell_state, output_gate: input vectors, size n_batch*n_cell. -// - cell_state_scale: scaling of cell_state. -// - hidden_scale_[a|b]: effective scale of cell_state.*output_gate -// - hidden_zp: zero_point for cell_state.*output_gate -// - projection_weights, proj_scale_[a|b], projection_bias: -// constant inputs, describing projection matrix and bias. -// - output_state_zp: zero point of output_state. (Input, calibrated value.) -// - quantized_proj_clip: if > 0, clip the output of the projection. -// - output_state: output vector, size n_batch*n_output. Must be contigous. -// - scratch0: scratch area of size n_batch*n_cell -// - scratch1: scratch area of size n_batch*n_cell -// - scratch2: scratch area used by MatrixBatchVectorMultiplyAccumulate -void CalculateLstmOutputInteger8x8_16( - int n_batch, int n_cell, int n_output, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* output_gate, - int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp, - const int8_t* projection_weights, int32_t proj_scale_a, - int32_t proj_scale_b, const int32_t* projection_bias, - int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state, - int16_t* scratch0, int8_t* scratch1, int32_t* scratch2) { - // Note: unlike float/hybrid, the activation is always Tanh. - +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data) { + int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3; + if (tanh_input_left_shift < 0) /* handling negative shift value */ { - int32_t tanh_input_left_shift = (15 + cell_state_scale) - 3; - int32_t dims_data = n_batch * n_cell; - if (tanh_input_left_shift < 0) /* handling negative shift value */ - { - int32_t i; - tanh_input_left_shift = -tanh_input_left_shift; - for (i = 0; i < dims_data; i++) { - cell_state[i] = cell_state[i] >> tanh_input_left_shift; - } - tanh_input_left_shift = 0; + int32_t i; + tanh_input_left_shift = -tanh_input_left_shift; + for (i = 0; i < input_data_shape.FlatSize(); i++) { + input_data[i] = input_data[i] >> tanh_input_left_shift; } - RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data); - reference_integer_ops::Tanh(0, tanh_input_left_shift, tanh_inp_shape, - cell_state, tanh_inp_shape, scratch0); - } - tflite::tensor_utils::CwiseMul(output_gate, scratch0, hidden_scale_a, - hidden_scale_b, n_batch, n_cell, hidden_zp, - scratch1); - - const bool use_projection = (projection_weights != nullptr); - - if (use_projection) { - // Note: no bias like in float/hybrid - memset(output_state, 0, n_batch * n_output * sizeof(int8_t)); - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - scratch1, projection_bias, projection_weights, proj_scale_a, - proj_scale_b, n_batch, n_cell, n_output, output_state_zp, scratch2, - output_state, nullptr); - if (quantized_proj_clip > 0) { - tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output, - quantized_proj_clip); - } - } else { - std::memcpy(output_state, scratch1, n_batch * n_output * sizeof(int8_t)); + tanh_input_left_shift = 0; } + reference_integer_ops::Tanh(0, tanh_input_left_shift, input_data_shape, + input_data, output_data_shape, output_data); } -// Performs an LSTM batch inference step for input specified by input_ptr. -// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and -// biases (*_bias_ptr), and buffers (*_scratch), along with additional -// parameters: -// - params: various LSTM params including activation, clipping, etc., -// - n_batch: size of batch, -// - n_cell: number of cells (or units), -// - n_input: the input size, -// - n_aux_input: the auxiliary input size. -// - n_output: the output size. -// - output_batch_leading_dim: the leading dimension of the output buffer. -// -// Input of size 'n_batch * n_input': -// input_ptr -// Input of size 'n_batch * n_aux_input': -// aux_input_ptr - optional (can be nullptr) -// -// LSTM weights: -// Input weights of size 'n_cell * n_input': -// input_to_input_weights - optional -// input_to_forget_weights -// input_to_cell_weights -// input_to_output_weights -// Auxiliary input weights of size 'n_cell * n_aux_input': -// aux_input_to_input_weights - optional -// aux_input_to_forget_weights - optional -// aux_input_to_cell_weights - optional -// aux_input_to_output_weights - optional -// Recurrent weights of size 'n_cell * n_output': -// recurrent_to_input_weights - optional -// recurrent_to_forget_weights -// recurrent_to_cell_weights -// recurrent_to_input_weights -// Peephole weights of size 'n_cell', representing diagonal matrices. -// cell_to_input_weights - optional -// cell_to_cell_weights - optional -// cell_to_output_weights - optional -// Projection weights of size 'n_output * n_cell' -// projection_weights_ptr - optional -// Gate biases of size 'n_cell': -// input_gate_bias_ptr - optional -// forget_gate_bias_ptr -// cell_gate_bias_ptr -// output_gate_bias_ptr -// -// Layer norm coefficients of size 'n_cell', representing diagonal matrices. -// input_layer_norm_coefficients_ptr - optional -// forget_layer_norm_coefficients_ptr - optional -// cell_layer_norm_coefficients_ptr - optional -// output_layer_norm_coefficients_ptr - optional -// -// The pointers to the cell and output state and the output are updated. -// -// The pointers input_ptr, aux_input_ptr, and output_ptr point to data aligned -// in batch_major order, and each step processes batch_size many inputs from -// input_ptr, and updates batch_size many cell and output states. -// -// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the -// output tensor, and in most cases will be equal to n_output. It is usually not -// when we want to store the LSTM output into a slice of the output tensor, e.g. -// for bidirectional LSTMs with merge_outputs. In this case, the batched -// operations cannot be used since they assume that the batched outputs are -// contiguous, and we manually loop over the batched outputs. -void LstmStepFloat( - const float* input_ptr, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, const float* aux_input_ptr, - const float* aux_input_to_input_weights_ptr, - const float* aux_input_to_forget_weights_ptr, - const float* aux_input_to_cell_weights_ptr, - const float* aux_input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, - const float* input_layer_norm_coefficients_ptr, - const float* forget_layer_norm_coefficients_ptr, - const float* cell_layer_norm_coefficients_ptr, - const float* output_layer_norm_coefficients_ptr, - const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr, - const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr, - const float* projection_weights_ptr, const float* projection_bias_ptr, - const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, - int n_aux_input, int n_output, int output_batch_leading_dim, - float* output_state_ptr, float* cell_state_ptr, float* scratch0, - float* scratch1, float* scratch2, float* scratch3, float* output_ptr) { - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weights_ptr == nullptr); +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data) { + reference_ops::Tanh(input_data_shape, input_data, output_data_shape, + output_data); +} - // Make named scratch buffers. - float* input_gate_scratch = scratch0; - float* forget_gate_scratch = scratch1; - float* cell_gate_scratch = scratch2; - float* output_gate_scratch = scratch3; +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); +} - // Check if inputs are all zeros so we can skip some computations. - const bool is_input_all_zeros = - tflite::tensor_utils::IsZeroVector(input_ptr, n_batch * n_input); - const bool is_aux_input_all_zeros = - (aux_input_ptr == nullptr || tflite::tensor_utils::IsZeroVector( - aux_input_ptr, n_batch * n_aux_input)); - if (!use_cifg) { - // Calculate the input gate. (If not CIFG.) - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_input_weights_ptr, aux_input_ptr, - aux_input_to_input_weights_ptr, output_state_ptr, - recurrent_to_input_weights_ptr, cell_state_ptr, - cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr, - input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, input_gate_scratch, - is_input_all_zeros, is_aux_input_all_zeros); - } - // Calculate the forget gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_forget_weights_ptr, aux_input_ptr, - aux_input_to_forget_weights_ptr, output_state_ptr, - recurrent_to_forget_weights_ptr, cell_state_ptr, - cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr, - forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Calculate the cell update gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_cell_weights_ptr, aux_input_ptr, - aux_input_to_cell_weights_ptr, output_state_ptr, - recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr, - /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, - cell_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - params->activation, cell_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Update the cell state. - lstm_internal::UpdateLstmCellFloat( - n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch, - cell_gate_scratch, use_cifg, params->cell_clip); - // Calculate output gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_output_weights_ptr, aux_input_ptr, - aux_input_to_output_weights_ptr, output_state_ptr, - recurrent_to_output_weights_ptr, cell_state_ptr, - cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr, - output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Update the output state. - lstm_internal::CalculateLstmOutputFloat( - n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch, - params->activation, projection_weights_ptr, projection_bias_ptr, - params->proj_clip, output_state_ptr, scratch2); - // Copy output state to the output. Note that the output's rows may not be - // contiguous (output_batch_leading_dim != n_output). - for (int b = 0; b < n_batch; b++) { - std::memcpy(output_ptr + b * output_batch_leading_dim, - output_state_ptr + b * n_output, n_output * sizeof(float)); - } +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); } -// Fully quantized lstm kernel for 16 bit gate matmul output. -// -// Input tensor of size n_batch * n_input: -// input_ptr -// -// LSTM weights: -// Quantized input weights of size 'n_cell * n_input': -// input_to_input_weight_ptr - optional -// input_to_forget_weight_ptr - optional -// input_to_cell_weight_ptr - optional -// input_to_output_weight_ptr - optional -// -// Quantized recurrent weights of size 'n_cell * n_output': -// recurrent_to_input_weight_ptr - optional -// recurrent_to_forget_weights_ptr -// recurrent_to_cell_weights_ptr -// recurrent_to_input_weights_ptr -// -// Quantized peephole weights of size 'n_cell', representing diagonal matrices. -// cell_to_input_weights - optional -// cell_to_cell_weights - optional -// cell_to_output_weights - optional -// -// Quantized projection weights of size 'n_output * n_cell' -// projection_weight_ptr - optional -// -// Weight scales (scalars) for each of the weights above. -// effective_input_to_input_scale_a - optional -// effective_input_to_input_scale_b - optional -// effective_input_to_forget_scale_a -// effective_input_to_forget_scale_b -// effective_input_to_cell_scale_a -// effective_input_to_cell_scale_b -// effective_input_to_output_scale_a -// effective_input_to_output_scale_b -// effective_recurrent_to_input_scale_a - optional -// effective_recurrent_to_input_scale_b - optional -// effective_recurrent_to_forget_scale_a -// effective_recurrent_to_forget_scale_b -// effective_recurrent_to_cell_scale_a -// effective_recurrent_to_cell_scale_b -// effective_recurrent_to_output_scale_a -// effective_recurrent_to_output_scale_b -// effective_proj_scale_a - optional -// effective_proj_scale_b - optional -// -// Gate biases of size 'n_cell': -// input_gate_bias_ptr - optional -// forget_gate_bias_ptr -// cell_gate_bias_ptr -// output_gate_bias_ptr -// -// Layer norm coefficients of size 'n_cell', representing diagonal matrices. -// layer_norm_input_weight_ptr - optional -// layer_norm_forget_weight_ptr - optional -// layer_norm_cell_weight_ptr - optional -// layer_norm_output_weight_ptr - optional -// -// Layer norm scales of size 'n_cell'. -// layer_norm_input_scale_a - optional -// layer_norm_input_scale_b - optional -// layer_norm_forget_scale_a - optional -// layer_norm_forget_scale_b - optional -// layer_norm_cell_scale_a - optional -// layer_norm_cell_scale_b - optional -// layer_norm_output_scale_a - optional -// layer_norm_output_scale_b - optional -// -// Scalar values: -// quantized_cell_clip: quantized clip value for cell. -// quantized_proj_clip: quantized clip value for projection. -// cell_state_scale: the power of two scale for cell state. -// -// Zero points: -// output_state_zp: zero point of output state -// hidden_zp: zero point for hidden state. -// -// Temporary pre-allocated storage for the calculation. Each is of size n_cell * -// n_batch. -// scratch0 -// scratch1 -// scratch2 -// scratch3 -// scratch4 -// scratch5: this scratch buffer is created purely for optimizing the -// MatrixBatchVectorMultiplyAccumulate. -// -// Outputs: -// output_state_ptr - size 'n_batch * n_output' -// cell_state_ptr - size 'n_batch * n_cell' -// output_ptr - size 'n_batch * n_output' -// TODO(b/159947023): scratch0 is not used if (!cifg). Don't allocate then. -void LstmStepInteger8x8_16( - const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr, - int32_t effective_input_to_input_scale_a, - int32_t effective_input_to_input_scale_b, - const int8_t* input_to_forget_weight_ptr, - int32_t effective_input_to_forget_scale_a, - int32_t effective_input_to_forget_scale_b, - const int8_t* input_to_cell_weight_ptr, - int32_t effective_input_to_cell_scale_a, - int32_t effective_input_to_cell_scale_b, - const int8_t* input_to_output_weight_ptr, - int32_t effective_input_to_output_scale_a, - int32_t effective_input_to_output_scale_b, - const int8_t* recurrent_to_input_weight_ptr, - int32_t effective_recurrent_to_input_scale_a, - int32_t effective_recurrent_to_input_scale_b, - const int8_t* recurrent_to_forget_weight_ptr, - int32_t effective_recurrent_to_forget_scale_a, - int32_t effective_recurrent_to_forget_scale_b, - const int8_t* recurrent_to_cell_weight_ptr, - int32_t effective_recurrent_to_cell_scale_a, - int32_t effective_recurrent_to_cell_scale_b, - const int8_t* recurrent_to_output_weight_ptr, - int32_t effective_recurrent_to_output_scale_a, - int32_t effective_recurrent_to_output_scale_b, - const int16_t* cell_to_input_weight_ptr, - int32_t effective_cell_to_input_scale_a, - int32_t effective_cell_to_input_scale_b, - const int16_t* cell_to_forget_weight_ptr, - int32_t effective_cell_to_forget_scale_a, - int32_t effective_cell_to_forget_scale_b, - const int16_t* cell_to_output_weight_ptr, - int32_t effective_cell_to_output_scale_a, - int32_t effective_cell_to_output_scale_b, - const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a, - int32_t effective_proj_scale_b, int32_t hidden_zp, - int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b, - const int16_t* layer_norm_input_weight_ptr, - int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b, - const int16_t* layer_norm_forget_weight_ptr, - int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b, - const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a, - int32_t layer_norm_cell_scale_b, - const int16_t* layer_norm_output_weight_ptr, - int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b, - const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr, - const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr, - int16_t quantized_cell_clip, int8_t quantized_proj_clip, - int32_t cell_state_scale, int32_t input_variance_guard, - int32_t forget_variance_guard, int32_t cell_variance_guard, - int32_t output_variance_guard, - const int32_t* input_to_forget_effective_bias, - const int32_t* recurrent_to_forget_effective_bias, - const int32_t* input_to_cell_effective_bias, - const int32_t* recurrent_to_cell_effective_bias, - const int32_t* input_to_output_effective_bias, - const int32_t* recurrent_to_output_effective_bias, - const int32_t* input_to_input_effective_bias, - const int32_t* recurrent_to_input_effective_bias, - const int32_t* projection_effective_bias, int n_batch, int n_cell, - int n_input, int n_output, int8_t* output_state_ptr, - int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr, - int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3, - int8_t* scratch4, int32_t* scratch5) { - // Make named scratch buffers for the different gates. - int16_t* input_gate_scratch = scratch0; - int16_t* forget_gate_scratch = scratch1; - int16_t* cell_gate_scratch = scratch2; - int16_t* output_gate_scratch = scratch3; +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data) { + return reference_ops::Mul(params, shape, input1_data, shape, input2_data, + shape, output_data); +} - // Since we have already checked that weights are all there or none, we - // can check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weight_ptr == nullptr); +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} - // Check for nullptrs. - TFLITE_DCHECK(input_to_forget_effective_bias); - TFLITE_DCHECK(recurrent_to_forget_effective_bias); - TFLITE_DCHECK(input_to_cell_effective_bias); - TFLITE_DCHECK(recurrent_to_cell_effective_bias); - TFLITE_DCHECK(input_to_output_effective_bias); - TFLITE_DCHECK(recurrent_to_output_effective_bias); - if (!use_cifg) { - TFLITE_DCHECK(input_to_input_effective_bias); - TFLITE_DCHECK(recurrent_to_input_effective_bias); - } - const bool use_projection = (projection_weight_ptr != nullptr); - if (use_projection) { - TFLITE_DCHECK(projection_effective_bias); - } - if (!use_cifg) { - // Calculate the input gate. (If not CIFG.) - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_input_weight_ptr, input_to_input_effective_bias, - effective_input_to_input_scale_a, effective_input_to_input_scale_b, - output_state_ptr, recurrent_to_input_weight_ptr, - recurrent_to_input_effective_bias, effective_recurrent_to_input_scale_a, - effective_recurrent_to_input_scale_b, cell_state_ptr, - cell_to_input_weight_ptr, effective_cell_to_input_scale_a, - effective_cell_to_input_scale_b, layer_norm_input_weight_ptr, - input_gate_bias_ptr, layer_norm_input_scale_a, layer_norm_input_scale_b, - input_variance_guard, n_batch, n_input, n_output, n_cell, - kTfLiteActSigmoid, input_gate_scratch, scratch5); - } - // Calculate the forget gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_forget_weight_ptr, input_to_forget_effective_bias, - effective_input_to_forget_scale_a, effective_input_to_forget_scale_b, - output_state_ptr, recurrent_to_forget_weight_ptr, - recurrent_to_forget_effective_bias, effective_recurrent_to_forget_scale_a, - effective_recurrent_to_forget_scale_b, cell_state_ptr, - cell_to_forget_weight_ptr, effective_cell_to_forget_scale_a, - effective_cell_to_forget_scale_b, layer_norm_forget_weight_ptr, - forget_gate_bias_ptr, layer_norm_forget_scale_a, - layer_norm_forget_scale_b, forget_variance_guard, n_batch, n_input, - n_output, n_cell, kTfLiteActSigmoid, forget_gate_scratch, scratch5); - // Calculate the cell update gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_cell_weight_ptr, input_to_cell_effective_bias, - effective_input_to_cell_scale_a, effective_input_to_cell_scale_b, - output_state_ptr, recurrent_to_cell_weight_ptr, - recurrent_to_cell_effective_bias, effective_recurrent_to_cell_scale_a, - effective_recurrent_to_cell_scale_b, cell_state_ptr, - /*cell_to_gate_weights=*/nullptr, /*cell_to_gate_scale_a=*/0, - /*cell_to_gate_scale_b=*/0, layer_norm_cell_weight_ptr, - cell_gate_bias_ptr, layer_norm_cell_scale_a, layer_norm_cell_scale_b, - cell_variance_guard, n_batch, n_input, n_output, n_cell, kTfLiteActTanh, - cell_gate_scratch, scratch5); - // Update the cell state. - lstm_internal::UpdateLstmCellInteger( - n_batch, n_cell, cell_state_ptr, cell_state_scale, input_gate_scratch, - forget_gate_scratch, cell_gate_scratch, use_cifg, quantized_cell_clip); - // Calculate the output gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_output_weight_ptr, input_to_output_effective_bias, - effective_input_to_output_scale_a, effective_input_to_output_scale_b, - output_state_ptr, recurrent_to_output_weight_ptr, - recurrent_to_output_effective_bias, effective_recurrent_to_output_scale_a, - effective_recurrent_to_output_scale_b, cell_state_ptr, - cell_to_output_weight_ptr, effective_cell_to_output_scale_a, - effective_cell_to_output_scale_b, layer_norm_output_weight_ptr, - output_gate_bias_ptr, layer_norm_output_scale_a, - layer_norm_output_scale_b, output_variance_guard, n_batch, n_input, - n_output, n_cell, kTfLiteActSigmoid, output_gate_scratch, scratch5); - // Update the output state. - lstm_internal::CalculateLstmOutputInteger8x8_16( - n_batch, n_cell, n_output, cell_state_ptr, cell_state_scale, - output_gate_scratch, effective_hidden_scale_a, effective_hidden_scale_b, - hidden_zp, projection_weight_ptr, effective_proj_scale_a, - effective_proj_scale_b, projection_effective_bias, output_state_zp, - quantized_proj_clip, output_state_ptr, scratch0, scratch4, scratch5); - // Copy output state to the output. Note that unlike float or hybrid, output - // is always contiguous. - std::memcpy(output_ptr, output_state_ptr, - n_batch * n_output * sizeof(int8_t)); +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); } -} // namespace lstm_internal +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data) { + return tflite::reference_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} -TfLiteStatus EvalFloatLstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3); - int max_time, n_batch; - if (input->dims->size == 3) { - max_time = (time_major) ? input->dims->data[0] : input->dims->data[1]; - n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0]; - } else { - max_time = 1; - n_batch = input->dims->data[0]; +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = + std::max(std::min(cell_state_info.quantized_cell_clip, vector[i]), + static_cast(-cell_state_info.quantized_cell_clip)); } - const int n_input = input->dims->data[input->dims->size - 1]; - const int aux_input_size = - (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0; - - // n_cell and n_output will be the same size when there is no projection. - const int n_cell = input_to_output_weights->dims->data[0]; - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weights == nullptr); +} - // Index the scratch buffers pointers to the global scratch buffer. - float* input_gate_scratch = nullptr; - float* cell_gate_scratch = nullptr; - float* forget_gate_scratch = nullptr; - float* output_gate_scratch = nullptr; - if (use_cifg) { - cell_gate_scratch = scratch_buffer; - forget_gate_scratch = scratch_buffer + n_cell * n_batch; - output_gate_scratch = scratch_buffer + 2 * n_cell * n_batch; - } else { - input_gate_scratch = scratch_buffer; - cell_gate_scratch = scratch_buffer + n_cell * n_batch; - forget_gate_scratch = scratch_buffer + 2 * n_cell * n_batch; - output_gate_scratch = scratch_buffer + 3 * n_cell * n_batch; +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = std::max(std::min(cell_state_info.cell_clip, vector[i]), + -cell_state_info.cell_clip); } +} - const int output_batch_leading_dim = - output->dims->data[output->dims->size - 1]; - if (time_major) { - // Loop through the sequence. - const int input_step = n_batch * n_input; - const int output_step = n_batch * output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const float* input_ptr = - tflite::micro::GetTensorData(input) + t_rel * input_step; - const float* aux_input_ptr = nullptr; - if (aux_input) { - aux_input_ptr = - tflite::micro::GetTensorData(aux_input) + t_rel * input_step; - } - float* output_ptr = tflite::micro::GetTensorData(output) + - t_rel * output_step + output_offset; - - lstm_internal::LstmStepFloat( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - aux_input_ptr, - aux_input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(aux_input_to_input_weights), - aux_input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_forget_weights), - aux_input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(aux_input_to_cell_weights), - aux_input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_output_weights), - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_input_weights), - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_cell_weights), - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - projection_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_bias), - params, n_batch, n_cell, n_input, aux_input_size, n_output, - output_batch_leading_dim, - tflite::micro::GetTensorData(output_state), - tflite::micro::GetTensorData(cell_state), input_gate_scratch, - forget_gate_scratch, cell_gate_scratch, output_gate_scratch, - output_ptr); - } - } else { - for (int b = 0; b < n_batch; b++) { - const int input_step = n_input; - const int output_step = output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const int time_offset = b * max_time + t_rel; - const float* input_ptr = tflite::micro::GetTensorData(input) + - time_offset * input_step; - const float* aux_input_ptr = nullptr; - if (aux_input) { - aux_input_ptr = tflite::micro::GetTensorData(aux_input) + - time_offset * input_step; - } - float* output_ptr = tflite::micro::GetTensorData(output) + - time_offset * output_step + output_offset; - - // Offset the {output,cell}_state pointers to the right batch. - float* output_state_ptr = - tflite::micro::GetTensorData(output_state) + - b * output_batch_leading_dim; - float* cell_state_ptr = - tflite::micro::GetTensorData(cell_state) + b * n_cell; - // Offset the scratch pointers to the right batch. - float* input_gate_scratch_ptr = - input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr; - float* forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell; - float* cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell; - float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell; +// Increment the data offset so the sigle time step invocation call can access +// the corresponding input/output tensor data at the time step +void LstmStepManager::UpdateTime() { + current_time_ += 1; + TFLITE_DCHECK_LE(current_time_, size_info_.time_steps); + // default as one batch per inference + int input_step = size_info_.input_dimension; + int output_step = size_info_.state_dimension; + // time major: batch inference + if (size_info_.time_major) { + input_step = input_step * size_info_.batch_size; + output_step = output_step * size_info_.batch_size; + } + + input_offset_ += input_step; + output_offset_ += output_step; +} - lstm_internal::LstmStepFloat( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - aux_input_ptr, - aux_input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_input_weights), - aux_input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_forget_weights), - aux_input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_cell_weights), - aux_input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_output_weights), - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_cell_weights), - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - projection_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_bias), - params, - /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, - output_batch_leading_dim, output_state_ptr, cell_state_ptr, - input_gate_scratch_ptr, forget_gate_scratch_ptr, - cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr); - } - } - } - return kTfLiteOk; +// Increment the data offset so the sigle time step invocation call can access +// the corresponding hidden/cell state tensor data at the time step (for single +// batch inference only) +void LstmStepManager::UpdateBatch() { + current_batch_ += 1; + TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size); + // batch inference for time major: no action needed + if (size_info_.time_major) { + return; + } + // otherwise: singe batch inference, go to the next batch + hidden_state_offset_ += size_info_.state_dimension; + cell_state_offset_ += size_info_.state_dimension; } -TfLiteStatus EvalInteger8x8_16Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, - const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1, - int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5) { - TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3); - const int n_input = input->dims->data[input->dims->size - 1]; - int max_time, n_batch; - if (input->dims->size == 2) { - max_time = 1; - n_batch = input->dims->data[0]; - } else { - max_time = (time_major) ? input->dims->data[0] : input->dims->data[1]; - n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0]; +// Input shape for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::InputShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; } + const int dims[2] = {batch_size, size_info_.input_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); +} - // n_cell and n_output will be the same size when there is no projection. - const int n_cell = input_to_output_weights->dims->data[0]; - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Get params for time/batch/sequence. - const int output_batch_leading_dim = - output->dims->data[output->dims->size - 1]; - - if (time_major) { - const int input_step = n_batch * n_input; - const int output_step = n_batch * output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - const int t_rel = t; - int8_t* output_ptr = - tflite::micro::GetTensorData(output) + t_rel * output_step; - const int8_t* input_ptr = - tflite::micro::GetTensorData(input) + t_rel * input_step; - lstm_internal::LstmStepInteger8x8_16( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - integer_lstm_param->effective_input_to_input_scale_a, - integer_lstm_param->effective_input_to_input_scale_b, - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - integer_lstm_param->effective_input_to_forget_scale_a, - integer_lstm_param->effective_input_to_forget_scale_b, - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - integer_lstm_param->effective_input_to_cell_scale_a, - integer_lstm_param->effective_input_to_cell_scale_b, - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - integer_lstm_param->effective_input_to_output_scale_a, - integer_lstm_param->effective_input_to_output_scale_b, - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - integer_lstm_param->effective_recurrent_to_input_scale_a, - integer_lstm_param->effective_recurrent_to_input_scale_b, - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - integer_lstm_param->effective_recurrent_to_forget_scale_a, - integer_lstm_param->effective_recurrent_to_forget_scale_b, - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_cell_weights), - integer_lstm_param->effective_recurrent_to_cell_scale_a, - integer_lstm_param->effective_recurrent_to_cell_scale_b, - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - integer_lstm_param->effective_recurrent_to_output_scale_a, - integer_lstm_param->effective_recurrent_to_output_scale_b, - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - integer_lstm_param->effective_cell_to_input_scale_a, - integer_lstm_param->effective_cell_to_input_scale_b, - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - integer_lstm_param->effective_cell_to_forget_scale_a, - integer_lstm_param->effective_cell_to_forget_scale_b, - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - integer_lstm_param->effective_cell_to_output_scale_a, - integer_lstm_param->effective_cell_to_output_scale_b, - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - integer_lstm_param->effective_proj_scale_a, - integer_lstm_param->effective_proj_scale_b, - integer_lstm_param->hidden_zp, - integer_lstm_param->effective_hidden_scale_a, - integer_lstm_param->effective_hidden_scale_b, - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - integer_lstm_param->layer_norm_input_scale_a, - integer_lstm_param->layer_norm_input_scale_b, - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - integer_lstm_param->layer_norm_forget_scale_a, - integer_lstm_param->layer_norm_forget_scale_b, - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - integer_lstm_param->layer_norm_cell_scale_a, - integer_lstm_param->layer_norm_cell_scale_b, - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - integer_lstm_param->layer_norm_output_scale_a, - integer_lstm_param->layer_norm_output_scale_b, - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - integer_lstm_param->quantized_cell_clip, - integer_lstm_param->quantized_proj_clip, - integer_lstm_param->cell_scale, - integer_lstm_param->input_variance_guard, - integer_lstm_param->forget_variance_guard, - integer_lstm_param->cell_variance_guard, - integer_lstm_param->output_variance_guard, - integer_lstm_param->input_to_forget_effective_bias, - integer_lstm_param->recurrent_to_forget_effective_bias, - integer_lstm_param->input_to_cell_effective_bias, - integer_lstm_param->recurrent_to_cell_effective_bias, - integer_lstm_param->input_to_output_effective_bias, - integer_lstm_param->recurrent_to_output_effective_bias, - integer_lstm_param->input_to_input_effective_bias, - integer_lstm_param->recurrent_to_input_effective_bias, - integer_lstm_param->projection_effective_bias, n_batch, n_cell, - n_input, n_output, tflite::micro::GetTensorData(output_state), - output_state_zp, tflite::micro::GetTensorData(cell_state), - output_ptr, scratch0, scratch1, scratch2, scratch3, scratch4, - scratch5); - } - } else { - for (int b = 0; b < n_batch; b++) { - const int input_step = n_input; - const int output_step = output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const int time_offset = b * max_time + t_rel; - const int8_t* input_ptr = tflite::micro::GetTensorData(input) + - time_offset * input_step; - int8_t* output_ptr = tflite::micro::GetTensorData(output) + - time_offset * output_step; - - // Offset the {output,cell}_state pointers to the right batch. - int8_t* output_state_ptr = - tflite::micro::GetTensorData(output_state) + - b * output_batch_leading_dim; - int16_t* cell_state_ptr = - tflite::micro::GetTensorData(cell_state) + b * n_cell; - - lstm_internal::LstmStepInteger8x8_16( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - integer_lstm_param->effective_input_to_input_scale_a, - integer_lstm_param->effective_input_to_input_scale_b, - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - integer_lstm_param->effective_input_to_forget_scale_a, - integer_lstm_param->effective_input_to_forget_scale_b, - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - integer_lstm_param->effective_input_to_cell_scale_a, - integer_lstm_param->effective_input_to_cell_scale_b, - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - integer_lstm_param->effective_input_to_output_scale_a, - integer_lstm_param->effective_input_to_output_scale_b, - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - integer_lstm_param->effective_recurrent_to_input_scale_a, - integer_lstm_param->effective_recurrent_to_input_scale_b, - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - integer_lstm_param->effective_recurrent_to_forget_scale_a, - integer_lstm_param->effective_recurrent_to_forget_scale_b, - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_cell_weights), - integer_lstm_param->effective_recurrent_to_cell_scale_a, - integer_lstm_param->effective_recurrent_to_cell_scale_b, - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - integer_lstm_param->effective_recurrent_to_output_scale_a, - integer_lstm_param->effective_recurrent_to_output_scale_b, - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - integer_lstm_param->effective_cell_to_input_scale_a, - integer_lstm_param->effective_cell_to_input_scale_b, - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - integer_lstm_param->effective_cell_to_forget_scale_a, - integer_lstm_param->effective_cell_to_forget_scale_b, - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - integer_lstm_param->effective_cell_to_output_scale_a, - integer_lstm_param->effective_cell_to_output_scale_b, - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - integer_lstm_param->effective_proj_scale_a, - integer_lstm_param->effective_proj_scale_b, - integer_lstm_param->hidden_zp, - integer_lstm_param->effective_hidden_scale_a, - integer_lstm_param->effective_hidden_scale_b, - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - integer_lstm_param->layer_norm_input_scale_a, - integer_lstm_param->layer_norm_input_scale_b, - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - integer_lstm_param->layer_norm_forget_scale_a, - integer_lstm_param->layer_norm_forget_scale_b, - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - integer_lstm_param->layer_norm_cell_scale_a, - integer_lstm_param->layer_norm_cell_scale_b, - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - integer_lstm_param->layer_norm_output_scale_a, - integer_lstm_param->layer_norm_output_scale_b, - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - integer_lstm_param->quantized_cell_clip, - integer_lstm_param->quantized_proj_clip, - integer_lstm_param->cell_scale, - integer_lstm_param->input_variance_guard, - integer_lstm_param->forget_variance_guard, - integer_lstm_param->cell_variance_guard, - integer_lstm_param->output_variance_guard, - integer_lstm_param->input_to_forget_effective_bias, - integer_lstm_param->recurrent_to_forget_effective_bias, - integer_lstm_param->input_to_cell_effective_bias, - integer_lstm_param->recurrent_to_cell_effective_bias, - integer_lstm_param->input_to_output_effective_bias, - integer_lstm_param->recurrent_to_output_effective_bias, - integer_lstm_param->input_to_input_effective_bias, - integer_lstm_param->recurrent_to_input_effective_bias, - integer_lstm_param->projection_effective_bias, /*n_batch=*/1, - n_cell, n_input, n_output, output_state_ptr, output_state_zp, - cell_state_ptr, output_ptr, scratch0, scratch1, scratch2, scratch3, - scratch4, scratch5); - } - } +// State shape (both hidden and cell) for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::StateShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; } - - return kTfLiteOk; + const int dims[2] = {batch_size, size_info_.state_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); } -} // namespace tflite \ No newline at end of file +} // namespace lstm_internal +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h index 7794adb5a..ebede610f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,304 +12,406 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ +// Functions to perform integer evaulation for standard LSTM (e.g., defined in +// the keras lstm layer, no peephole etc.). Currently used by the 16 bits +// activation case only + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#include #include -#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/micro_log.h" namespace tflite { // Since LSTM includes multiple intermediate stages, introducing the internal // namespace to expose them for testing namespace lstm_internal { -void CalculateLstmGateFloat( - const float* input, const float* input_to_gate_weights, - const float* aux_input, const float* aux_input_to_gate_weights, - const float* output_state, const float* recurrent_to_gate_weights, - const float* cell_state, const float* cell_to_gate_weights, - const float* layer_norm_coefficients, const float* gate_bias, - const int n_batch, const int n_input, const int n_aux_input, - const int n_output, const int n_cell, - const TfLiteFusedActivation activation, float* gate, - const bool is_input_all_zeros, const bool is_aux_input_all_zeros); - -void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state, - const float* input_gate, float* forget_gate, - const float* cell_gate, bool use_cifg, float clip); - -void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, - const float* cell_state, const float* output_gate, - TfLiteFusedActivation activation, - const float* projection_weights, - const float* projection_bias, - const float proj_clip, float* output_state, - float* scratch); - -void CalculateLstmGateInteger8x8_16( - // Input and weights - const int8_t* input, const int8_t* input_to_gate_weights, - const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a, - const int32_t input_to_gate_scale_b, - // Output state and weights - const int8_t* output_state, const int8_t* recurrent_to_gate_weights, - const int32_t* recurrent_to_gate_bias, - const int32_t recurrent_to_gate_scale_a, - const int32_t recurrent_to_gate_scale_b, - // Cell state and weights - const int16_t* cell_state, const int16_t* cell_to_gate_weights, - const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b, - // Layer normalization parameters (layer norm LSTM) - const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias, - const int32_t layer_norm_input_scale_a, - const int32_t layer_norm_input_scale_b, - const int32_t layer_norm_variance_guard, - // Array sizes - const int n_batch, const int n_input, const int n_output, const int n_cell, - const TfLiteFusedActivation activation, + +void Sigmoid(const RuntimeShape& data_shape, int16_t* data); + +void Sigmoid(const RuntimeShape& data_shape, float* data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output); + +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector); + +// Manages the slice position (offset), slice length (sliced tensor shape), +// and update rules for input/output/hidden state/cell state tensors at each +// time step. +class LstmStepManager { + public: + LstmStepManager() = delete; + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + explicit LstmStepManager(const LstmSizeInfo* size_info) + : size_info_(*size_info) {} + + void UpdateTime(); + void UpdateBatch(); + + void ResetTime() { current_time_ = 0; } + RuntimeShape InputShape() const; + RuntimeShape StateShape() const; + + int InputOffset() const { return input_offset_; } + int OutputOffset() const { return output_offset_; } + int HiddenStateOffset() const { return hidden_state_offset_; } + int CellStateOffset() const { return cell_state_offset_; } + + private: + int current_time_ = 0; + int current_batch_ = 0; + int input_offset_ = 0; + int output_offset_ = 0; + int hidden_state_offset_ = 0; + int cell_state_offset_ = 0; + // Sizeinfo is from LstmOpData, which reside in the memory arena + // (guarante to outlast LSTMStepManager, which reside in stack) + const LstmSizeInfo& size_info_; +}; + +// Calculates a single LSTM gate. +// Implements the following formula: +// gate = activate(FC(input) + FC(recurrent)) +// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) +template +void CalculateLstmGate( + const LstmStepManager& step_info, const GateParameters& gate_params, + // Input FC + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, // Output - int16_t* gate, - // Parameters for performance optimizations + CellType* gate_output, // Scratch arrays - int32_t* scratch5); - -void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* input_gate, - int16_t* forget_gate, const int16_t* cell_gate, - bool use_cifg, int16_t clip); - -void CalculateLstmOutputInteger8x8_16( - int n_batch, int n_cell, int n_output, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* output_gate, - int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp, - const int8_t* projection_weights, int32_t proj_scale_a, - int32_t proj_scale_b, const int32_t* projection_bias, - int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state, - int16_t* scratch0, int8_t* scratch1, int32_t* scratch2); - -void LstmStepFloat( - const float* input_ptr, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, const float* aux_input_ptr, - const float* aux_input_to_input_weights_ptr, - const float* aux_input_to_forget_weights_ptr, - const float* aux_input_to_cell_weights_ptr, - const float* aux_input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, - const float* input_layer_norm_coefficients_ptr, - const float* forget_layer_norm_coefficients_ptr, - const float* cell_layer_norm_coefficients_ptr, - const float* output_layer_norm_coefficients_ptr, - const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr, - const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr, - const float* projection_weights_ptr, const float* projection_bias_ptr, - const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, - int n_aux_input, int n_output, int output_batch_leading_dim, - float* output_state_ptr, float* cell_state_ptr, float* scratch0, - float* scratch1, float* scratch2, float* scratch3, float* output_ptr); - -void LstmStepInteger8x8_16( - const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr, - int32_t effective_input_to_input_scale_a, - int32_t effective_input_to_input_scale_b, - const int8_t* input_to_forget_weight_ptr, - int32_t effective_input_to_forget_scale_a, - int32_t effective_input_to_forget_scale_b, - const int8_t* input_to_cell_weight_ptr, - int32_t effective_input_to_cell_scale_a, - int32_t effective_input_to_cell_scale_b, - const int8_t* input_to_output_weight_ptr, - int32_t effective_input_to_output_scale_a, - int32_t effective_input_to_output_scale_b, - const int8_t* recurrent_to_input_weight_ptr, - int32_t effective_recurrent_to_input_scale_a, - int32_t effective_recurrent_to_input_scale_b, - const int8_t* recurrent_to_forget_weight_ptr, - int32_t effective_recurrent_to_forget_scale_a, - int32_t effective_recurrent_to_forget_scale_b, - const int8_t* recurrent_to_cell_weight_ptr, - int32_t effective_recurrent_to_cell_scale_a, - int32_t effective_recurrent_to_cell_scale_b, - const int8_t* recurrent_to_output_weight_ptr, - int32_t effective_recurrent_to_output_scale_a, - int32_t effective_recurrent_to_output_scale_b, - const int16_t* cell_to_input_weight_ptr, - int32_t effective_cell_to_input_scale_a, - int32_t effective_cell_to_input_scale_b, - const int16_t* cell_to_forget_weight_ptr, - int32_t effective_cell_to_forget_scale_a, - int32_t effective_cell_to_forget_scale_b, - const int16_t* cell_to_output_weight_ptr, - int32_t effective_cell_to_output_scale_a, - int32_t effective_cell_to_output_scale_b, - const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a, - int32_t effective_proj_scale_b, int32_t hidden_zp, - int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b, - const int16_t* layer_norm_input_weight_ptr, - int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b, - const int16_t* layer_norm_forget_weight_ptr, - int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b, - const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a, - int32_t layer_norm_cell_scale_b, - const int16_t* layer_norm_output_weight_ptr, - int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b, - const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr, - const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr, - int16_t quantized_cell_clip, int8_t quantized_proj_clip, - int32_t cell_state_scale, int32_t input_variance_guard, - int32_t forget_variance_guard, int32_t cell_variance_guard, - int32_t output_variance_guard, - const int32_t* input_to_forget_effective_bias, - const int32_t* recurrent_to_forget_effective_bias, - const int32_t* input_to_cell_effective_bias, - const int32_t* recurrent_to_cell_effective_bias, - const int32_t* input_to_output_effective_bias, - const int32_t* recurrent_to_output_effective_bias, - const int32_t* input_to_input_effective_bias, - const int32_t* recurrent_to_input_effective_bias, - const int32_t* projection_effective_bias, int n_batch, int n_cell, - int n_input, int n_output, int8_t* output_state_ptr, - int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr, - int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3, - int8_t* scratch4, int32_t* scratch5); -} // namespace lstm_internal + CellType* fc_output_buffer, const TfLiteFusedActivation activation) { + const auto gate_output_shape = step_info.StateShape(); + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE(step_info.InputOffset() + step_info.InputShape().FlatSize(), + tflite::micro::GetTensorShape(input).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(recurrent).FlatSize()); -// Pamameters for integer LSTM. -// Consider split this into two Integer Parameters if more fields are added. -struct IntegerLstmParameter { - int32_t effective_input_to_input_scale_a = 0; - int32_t effective_input_to_input_scale_b = 0; - int32_t effective_recurrent_to_input_scale_a = 0; - int32_t effective_recurrent_to_input_scale_b = 0; - int32_t effective_cell_to_input_scale_a = 0; - int32_t effective_cell_to_input_scale_b = 0; - int32_t effective_input_to_forget_scale_a = 0; - int32_t effective_input_to_forget_scale_b = 0; - int32_t effective_recurrent_to_forget_scale_a = 0; - int32_t effective_recurrent_to_forget_scale_b = 0; - int32_t effective_cell_to_forget_scale_a = 0; - int32_t effective_cell_to_forget_scale_b = 0; - int32_t effective_input_to_cell_scale_a = 0; - int32_t effective_input_to_cell_scale_b = 0; - int32_t effective_recurrent_to_cell_scale_a = 0; - int32_t effective_recurrent_to_cell_scale_b = 0; - int32_t effective_input_to_output_scale_a = 0; - int32_t effective_input_to_output_scale_b = 0; - int32_t effective_recurrent_to_output_scale_a = 0; - int32_t effective_recurrent_to_output_scale_b = 0; - int32_t effective_cell_to_output_scale_a = 0; - int32_t effective_cell_to_output_scale_b = 0; - int32_t effective_proj_scale_a = 0; - int32_t effective_proj_scale_b = 0; - int32_t effective_hidden_scale_a = 0; - int32_t effective_hidden_scale_b = 0; - int32_t layer_norm_input_scale_a = 0; - int32_t layer_norm_input_scale_b = 0; - int32_t layer_norm_forget_scale_a = 0; - int32_t layer_norm_forget_scale_b = 0; - int32_t layer_norm_cell_scale_a = 0; - int32_t layer_norm_cell_scale_b = 0; - int32_t layer_norm_output_scale_a = 0; - int32_t layer_norm_output_scale_b = 0; - // Quantized clip value for cell and projection. Zero value means no - // clipping. - int16_t quantized_cell_clip = 0; - int8_t quantized_proj_clip = 0; - int32_t hidden_zp = 0; - int32_t cell_scale = 0; - - int32_t input_variance_guard = 0; - int32_t forget_variance_guard = 0; - int32_t cell_variance_guard = 0; - int32_t output_variance_guard = 0; - - // Pre-calculate bias + zero_point * weight. - int32_t* input_to_forget_effective_bias = nullptr; - int32_t* recurrent_to_forget_effective_bias = nullptr; - int32_t* input_to_cell_effective_bias = nullptr; - int32_t* recurrent_to_cell_effective_bias = nullptr; - int32_t* input_to_output_effective_bias = nullptr; - int32_t* recurrent_to_output_effective_bias = nullptr; - int32_t* input_to_input_effective_bias = nullptr; - int32_t* recurrent_to_input_effective_bias = nullptr; - int32_t* projection_effective_bias = nullptr; - - // Scale and zero point for intermediate tensors. - // Used only in the 8x8_8 case. - int32_t intermediate_scale_a[8] = {}; - int32_t intermediate_scale_b[8] = {}; - int32_t intermediate_zp[12] = {}; -}; + // Input FC + FullyConnected(gate_params.input_fc_params, step_info.InputShape(), + tflite::micro::GetTensorData(input) + + step_info.InputOffset(), + micro::GetTensorShape(input_weight), + tflite::micro::GetTensorData(input_weight), + tflite::micro::GetTensorShape(input_bias), + tflite::micro::GetOptionalTensorData(input_bias), + gate_output_shape, gate_output); + + // Recurrent FC + FullyConnected(gate_params.recurrent_fc_params, step_info.StateShape(), + tflite::micro::GetTensorData(recurrent) + + step_info.HiddenStateOffset(), + tflite::micro::GetTensorShape(recurrent_weight), + tflite::micro::GetTensorData(recurrent_weight), + tflite::micro::GetTensorShape(recurrent_bias), + tflite::micro::GetOptionalTensorData(recurrent_bias), + gate_output_shape, fc_output_buffer); + + AddElementWise(gate_output, fc_output_buffer, + /*n_batch=*/gate_output_shape.DimsData()[0], + /*n_state=*/gate_output_shape.DimsData()[1], gate_output); + // Apply activation + switch (activation) { + case kTfLiteActSigmoid: + Sigmoid(gate_output_shape, gate_output); + break; + case kTfLiteActTanh: { + // Set the scale power to -12 to avoid shift + Tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, + gate_output_shape, gate_output); + } break; + default: + // Only Sigmoid or Tanh is used. + TFLITE_ASSERT_FALSE; + } +} + +// Update the cell state using the output from the forget gate, input gate, and +// cell gate Formula: updated_cell_state = forget_gate_output*cell_state + +// input_gate_output * cell_gate_output, where * denotes element wise +// multiplication +template +void UpdateLstmCell(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + // Gate outputs + CellType* forget_gate_output, + const CellType* input_gate_output, + const CellType* cell_gate_output, + // Mul parameters + const ArithmeticParams& forget_cell_mul_params, + const ArithmeticParams& input_mul_params, + const CellStateInfo& cell_state_info, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + + auto cell_state_shape = step_info.StateShape(); + // Forget Gate x Cell State + Mul(cell_state_shape, forget_cell_mul_params, forget_gate_output, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + // Input Gate x Cell Gate + Mul(cell_state_shape, input_mul_params, input_gate_output, cell_gate_output, + buffer); + + // Update the cell state + AddElementWise(tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + buffer, + /*n_batch=*/cell_state_shape.DimsData()[0], + /*n_state=*/cell_state_shape.DimsData()[1], + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + + if (cell_state_info.cell_clip > 0) { + Clipping(cell_state_shape.FlatSize(), cell_state_info, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + } +} + +// Update the hidden state of the LSTM kernel using the following formula: +// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means +// element wise multiplication +template +void UpdateLstmHidden(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + TfLiteEvalTensor* hidden_state, + const CellType* output_gate_output, + const ArithmeticParams& mul_params, + int32_t cell_state_scale_power, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(hidden_state).FlatSize()); + + auto cell_state_shape = step_info.StateShape(); + CellType* cell_state_data = + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(); + // Tanh(cell_state) + Tanh(cell_state_scale_power, cell_state_shape, cell_state_data, + cell_state_shape, buffer); + // Update the hidden state + Mul(cell_state_shape, mul_params, buffer, output_gate_output, + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset()); +} -TfLiteStatus EvalFloatLstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output); - -TfLiteStatus EvalInteger8x8_16Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, - const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1, - int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5); +template +void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + LSTMBuffers& buffers) { + /*Step1: Calculate gate outputs to prepare cell state update*/ + CellType* gate_internal_buffer = buffers.buffer3; + CellType* forget_gate_output = buffers.buffer0; + CalculateLstmGate( + step_info, op_data.forget_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToForgetWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToForgetWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + forget_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + // Input Gate calculation; + CellType* input_gate_output = buffers.buffer1; + CalculateLstmGate( + step_info, op_data.input_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToInputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToInputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + input_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + // Cell Gate calculation + CellType* cell_gate_output = buffers.buffer2; + CalculateLstmGate( + step_info, op_data.cell_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToCellWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToCellWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + cell_gate_output, + // Scratch arrays + gate_internal_buffer, op_data.cell_gate_nonlinear_type); + + /*Step2: update the cell state */ + const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters; + CellType* updated_input_buffer = buffers.buffer1; // reuse buffer + + UpdateLstmCell(step_info, kernel_content.CellStateTensor(), + forget_gate_output, input_gate_output, + cell_gate_output, + inter_gate_params.forget_cell_mul_params, + inter_gate_params.input_mul_params, + op_data.cell_state_info, updated_input_buffer); + + /*Step3: update the hidden state */ + CellType* output_gate_output = buffers.buffer1; // reuse buffer + CalculateLstmGate( + step_info, op_data.output_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToOutputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToOutputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + output_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + CellType* tanh_activated_cell_buffer = buffers.buffer0; // reuse buffer + tflite::lstm_internal::UpdateLstmHidden( + step_info, kernel_content.CellStateTensor(), + kernel_content.HiddenStateTensor(), output_gate_output, + inter_gate_params.output_mul_params, + op_data.cell_state_info.cell_state_scale_power, + tanh_activated_cell_buffer); + + /*Step4: copy the update the hidden state to output*/ + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.OutputOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(kernel_content.output_tensor).FlatSize()); + // record the output (from the updated hidden state) + ActivationType* output_ptr = tflite::micro::GetTensorData( + kernel_content.output_tensor); + const auto* hidden_state = kernel_content.HiddenStateTensor(); + std::memcpy(output_ptr + step_info.OutputOffset(), + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset(), + step_info.StateShape().FlatSize() * sizeof(ActivationType)); +} + +} // namespace lstm_internal + +// Evaulate the LSTM kernel with (potential) multi-steps and multi-batch input +// Since +template +TfLiteStatus EvalLstm(const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + LSTMBuffers& buffers) { + lstm_internal::LstmStepManager step_info(&op_data.size_info); + const auto& size_info = op_data.size_info; + // time is the first dimention, enable batch computation + if (size_info.time_major) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + } else { + // batch first, unable to size the input data. single batch inference + for (int b = 0; b < size_info.batch_size; b++) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + // prepare for the next batch + step_info.UpdateBatch(); + step_info.ResetTime(); + } + } + return kTfLiteOk; +} } // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_16ACT_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h new file mode 100644 index 000000000..aee12cf39 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h @@ -0,0 +1,817 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ + +#include +#include + +#include "tensorflow/lite/micro/kernels/lstm_eval.h" +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" +#include "tensorflow/lite/micro/test_helpers.h" +#include "tensorflow/lite/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +/*Helper Functions (mainly about mimicking the kernel preparation)*/ + +// Create fully connected parameters using quantization settings of input and +// weight tensors. +// Since TfLiteContext is not available during the kernel test, here we mimic +// (put into stack memory) CalculateOpDataFullyConnected in +// tensorflow/lite/micro/kernels/fully_connected_common.cc +template +tflite::FullyConnectedParams CreateFCParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& weight_quant_params, + const float nonlinear_activation_input_scale) { + OpDataFullyConnected data; + const double input_product_scale = + input_quant_params.scale * weight_quant_params.scale; + double effective_scale = + input_product_scale / + static_cast(nonlinear_activation_input_scale); + + QuantizeMultiplier(effective_scale, &data.output_multiplier, + &data.output_shift); + + data.input_zero_point = input_quant_params.zero_point; + + data.filter_zero_point = 0; // symmetrically quantized + data.output_zero_point = 0; // symmetrically quantized + + data.output_activation_min = std::numeric_limits::min(); + data.output_activation_max = std::numeric_limits::max(); + + return tflite::FullyConnectedParamsQuantized(data); +} + +inline tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Wrapper function to create gate parameters for the four internal LSTM gates +template +tflite::GateParameters CreateGateParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& hidden_state_quant_params, + const GateQuantizationParameters& gate_quantization_settings, + const float nonlinear_activation_input_scale) { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParams( + input_quant_params, gate_quantization_settings.activation_weight, + nonlinear_activation_input_scale); + gate_params.recurrent_fc_params = CreateFCParams( + hidden_state_quant_params, gate_quantization_settings.recurrent_weight, + nonlinear_activation_input_scale); + return gate_params; +} + +inline tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +template +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const int output_zp = 0) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + op_params.input1_offset = 0; + op_params.input2_offset = 0; + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} + +inline tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + -32768.0), + 32767.0)); + return cell_state_info; +} + +// Create LSTMKernelContents from LstmNodeContent by copying TfLiteEvalTensor +// pointers +template +LSTMKernelContents CreateLSTMKernelContent( + LstmNodeContent& + node_contents) { + LSTMKernelContents kernel_content; + // Point to correct tensors + kernel_content.internal_tensors[kLstmInputTensor] = + node_contents.GetEvalTensor(kLstmInputTensor); + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToInputWeightsTensor); + kernel_content.internal_tensors[kLstmInputToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmInputToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToCellWeightsTensor); + kernel_content.internal_tensors[kLstmInputToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmInputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmInputGateBiasTensor); + kernel_content.internal_tensors[kLstmForgetGateBiasTensor] = + node_contents.GetEvalTensor(kLstmForgetGateBiasTensor); + kernel_content.internal_tensors[kLstmCellGateBiasTensor] = + node_contents.GetEvalTensor(kLstmCellGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputStateTensor] = + node_contents.GetEvalTensor(kLstmOutputStateTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmCellStateTensor] = + node_contents.GetEvalTensor(kLstmCellStateTensor); + // Not used internal tensors + kernel_content.internal_tensors[kLstmCellToInputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToForgetWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToOutputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionBiasTensor] = nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmForgetLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmCellLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmOutputLayerNormCoefficientsTensor] = + nullptr; + // Output tensor + kernel_content.output_tensor = node_contents.OutputEvalTensor(); + return kernel_content; +} + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; +} + +// Create the LstmOpData using the LstmNodeContent and +// NodeQuantizationParameters (defined in test_data/lstm_test_data) During the +// actual inference phase, OpDataLSTM is created using information from the +// flatbuffer file. The test divide the complete LSTM node information into +// LstmNodeContent and NodeQuantizationParameters for easy construction +// purposes +template +OpDataLSTM CreateLstmOpData( + LstmNodeContent& + node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + const auto& quantization_settings = node_contents.QuantizationSettings(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + + op_data.cell_state_info = CreateLstmCellStateInfo( + quantization_settings.cell_state.scale, builtin_data.cell_clip); + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.forget_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.input_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.input_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.cell_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.cell_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.output_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.output_gate, + quantization_settings.nonlinear_activation_input_scale); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + return op_data; +} + +template +OpDataLSTM CreateLstmOpDataFloat( + LstmNodeContent& node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + op_data.cell_state_info.cell_clip = builtin_data.cell_clip; + op_data.cell_state_info.quantized_cell_clip = 0; // No quantization + op_data.cell_state_info.cell_state_scale_power = 0; // No quantization + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParamsFloat(); + op_data.input_gate_parameters = CreateGateParamsFloat(); + op_data.cell_gate_parameters = CreateGateParamsFloat(); + op_data.output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return op_data; +} + +/*Test Functions Below Here*/ +template +void ValidateResultGoldens(const T* golden, const T* output_data, + const int output_len, const float tolerance) { + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance); + } +} + +template +void TestCalculateLstmGateFloat(const TfLiteEvalTensor* input, + const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, + const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Result comparison + TfLiteFusedActivation nonlinear_type, + const float* expected_vals, float tolerance) { + float gate_output[batch_size * state_dimension] = {}; + float fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParamsFloat(); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + ValidateResultGoldens(expected_vals, gate_output, + batch_size * state_dimension, tolerance); +} + +template +void TestCalculateLstmGateInteger( + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Quantization settings + const NodeQuantizationParameters& node_quantization_settings, + const GateQuantizationParameters& gate_quantization_settings, + // Result comparison + TfLiteFusedActivation nonlinear_type, const float* expected_vals, + float tolerance) { + CellType gate_output[batch_size * state_dimension] = {}; + CellType fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParams( + node_quantization_settings.input, node_quantization_settings.hidden_state, + gate_quantization_settings, + node_quantization_settings.nonlinear_activation_input_scale); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // only int8 weight is supported now + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + float gate_output_float[batch_size * state_dimension] = {}; + Dequantize(gate_output, batch_size * state_dimension, + node_quantization_settings.nonlinear_activation_output_scale, 0, + gate_output_float); + + ValidateResultGoldens(expected_vals, gate_output_float, + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParamsFloat(); + auto input_mul_params = CreateInterGateMulParamsFloat(); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // copy the data since it will be updated + float forget_gate[batch_size * state_dimension] = {}; + std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output, + batch_size * state_dimension * sizeof(float)); + + CellStateInfo cell_state_info; + cell_state_info.cell_clip = node_content.BuiltinData().cell_clip; + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, forget_gate, + gate_output_data.expected_input_gate_output, + gate_output_data.expected_cell_gate_output, forget_cell_mul_params, + input_mul_params, cell_state_info, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_forget_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_forget_gate_output, + quantized_forget_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_input_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_input_gate_output, + quantized_input_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_cell_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_cell_gate_output, + quantized_cell_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + auto input_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + + auto cell_state_info = + CreateLstmCellStateInfo(quantization_settings.cell_state.scale, + node_content.BuiltinData().cell_clip); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, quantized_forget_gate, quantized_input_gate, + quantized_cell_gate, forget_cell_mul_params, input_mul_params, + cell_state_info, buffer); + + float cell_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, cell_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + cell_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestUpdateLstmHiddenFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParamsFloat(); + + int32_t cell_state_scale_power = 0; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, + gate_output_data.expected_output_gate_output, mul_params, + cell_state_scale_power, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmHiddenInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_output_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_output_gate_output, + quantized_output_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + + int cell_state_scale_power_buffer; + tflite::CheckedLog2(quantization_settings.cell_state.scale, + &cell_state_scale_power_buffer); + int32_t cell_state_scale_power = cell_state_scale_power_buffer; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, quantized_output_gate, mul_params, + cell_state_scale_power, buffer); + + float hidden_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, hidden_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + hidden_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestLstmStepFloat( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + // Scratch buffers on the stack + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + + ValidateResultGoldens( + gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(kernel_content.HiddenStateTensor()), + batch_size * state_dimension, hidden_state_tolerance); + ValidateResultGoldens( + gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, cell_state_tolerance); +} + +template +void TestLstmStepInteger( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + + // Scratch buffers on the stack + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep(step_info, op_data, kernel_content, + buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData( + kernel_content.HiddenStateTensor()), + batch_size * state_dimension, quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, dequantized_hidden_state); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, dequantized_cell_state); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + ValidateResultGoldens(gate_output_data.expected_updated_cell, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); +} + +template +void TestEvalLstmFloat( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + + tflite::EvalLstm(op_data, kernel_content, + buffers); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + node_contents.GetHiddenStateData(), + batch_size * state_dimension, hidden_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_cell_state, + node_contents.GetCellStateData(), + batch_size * state_dimension, cell_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_output, + node_contents.GetOutputData(), + batch_size * state_dimension, hidden_state_tolerance); +} + +template +void TestEvalLstmInteger( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + + tflite::EvalLstm( + op_data, kernel_content, buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, + dequantized_hidden_state); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetCellStateData(), batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, + dequantized_cell_state); + ValidateResultGoldens(eval_check_data.expected_cell_state, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); + + float dequantized_output[batch_size * state_dimension * time_steps] = {}; + Dequantize(node_contents.GetOutputData(), + batch_size * state_dimension * time_steps, + quantization_settings.output.scale, + quantization_settings.output.zero_point, dequantized_output); + ValidateResultGoldens(eval_check_data.expected_output, dequantized_output, + batch_size * state_dimension, hidden_state_tolerance); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h index ee34b8489..dbdc3c553 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h @@ -15,6 +15,9 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ #define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/kernels/internal/types.h" + namespace tflite { // Input Tensors of size {n_batch, n_input} @@ -63,5 +66,85 @@ constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional // Output tensors. constexpr int kLstmOutputTensor = 0; +// Parameters for the two fully conncted computation inside each gate +struct GateParameters { + FullyConnectedParams input_fc_params; + FullyConnectedParams recurrent_fc_params; +}; + +// Paramaters for the element wise multiplications between gate outputs +struct InterGateParameters { + ArithmeticParams forget_cell_mul_params; + ArithmeticParams input_mul_params; + ArithmeticParams output_mul_params; +}; + +// Size information about the LSTM kernel, which is deduced from tensors stored +// in the flat buffer file. +struct LstmSizeInfo { + bool time_major; + int batch_size; + int time_steps; + int input_dimension; + int state_dimension; +}; + +// Contains information about the cell state tensor +struct CellStateInfo { + float cell_clip; + // clipping range for cell state only 16 bits cell is supported (could be + // generalized through templatation) + int16_t quantized_cell_clip; + // 2^-cell_state_scale_power = cell state scale, required by integer tanh + // computation + int32_t cell_state_scale_power; +}; + +// Contains required computation information for LSTM kernel evaluation. +// Specifically, it includes shape and quantization settings for the LSTM +// internal operations. Formatted to support operations defined in the +// tensorflow/lite/kernels/internal/reference/integer_ops +// Should be constructed during the preparation phase +struct OpDataLSTM { + LstmSizeInfo size_info; + CellStateInfo cell_state_info; + TfLiteFusedActivation cell_gate_nonlinear_type; + GateParameters forget_gate_parameters; + GateParameters input_gate_parameters; + GateParameters cell_gate_parameters; + GateParameters output_gate_parameters; + InterGateParameters inter_gate_parameters; + int buffer_indices[4]; // TFLM only +}; + +// Provide an interface to access the internal tensors and buffers used for LSTM +// invocation. Constructed during the invocation phase +struct LSTMKernelContents { + public: + // Internal tensors, fixed (const). see lstm_shared.h for tensor names + const TfLiteEvalTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors[tensor_index]; + } + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateTensor() { + return internal_tensors[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateTensor() { + return internal_tensors[kLstmCellStateTensor]; + } + // Node internal tensors with indexes defined at the beginning of the file + TfLiteEvalTensor* internal_tensors[24]; + TfLiteEvalTensor* output_tensor; +}; + +template +struct LSTMBuffers { + // TFLM buffers requires buffer index from LstmOpData. + CellType* buffer0; + CellType* buffer1; + CellType* buffer2; + CellType* buffer3; +}; + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc index 1aebdefdc..b7b9cba8f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace maximum_minimum { + namespace { // This file has a reference implementation of TFMaximum/TFMinimum. @@ -65,8 +63,6 @@ struct MinimumOp { } }; -} // namespace - template void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) { @@ -111,22 +107,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace maximum_minimum +} // namespace TfLiteRegistration Register_MAXIMUM() { - return tflite::micro::RegisterOp( - nullptr, nullptr, - maximum_minimum::Eval); + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); } TfLiteRegistration Register_MINIMUM() { - return tflite::micro::RegisterOp( - nullptr, nullptr, - maximum_minimum::Eval); + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h index df2a8d2c3..3f990f20b 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,48 +42,68 @@ TfLiteRegistration Register_BROADCAST_ARGS(); TfLiteRegistration Register_BROADCAST_TO(); TfLiteRegistration Register_CALL_ONCE(); TfLiteRegistration Register_CAST(); +TfLiteRegistration Register_CEIL(); // TODO(b/160234179): Change custom OPs to also return by value. TfLiteRegistration* Register_CIRCULAR_BUFFER(); +TfLiteRegistration Register_CONCATENATION(); +TfLiteRegistration Register_CONV_2D(); TfLiteRegistration Register_CUMSUM(); TfLiteRegistration Register_DEPTH_TO_SPACE(); TfLiteRegistration Register_DEPTHWISE_CONV_2D(); TfLiteRegistration Register_DEQUANTIZE(); TfLiteRegistration Register_DIV(); TfLiteRegistration Register_ELU(); +TfLiteRegistration Register_EQUAL(); +TfLiteRegistration* Register_ETHOSU(); TfLiteRegistration Register_EXP(); TfLiteRegistration Register_EXPAND_DIMS(); TfLiteRegistration Register_FILL(); +TfLiteRegistration Register_FLOOR(); TfLiteRegistration Register_FLOOR_DIV(); TfLiteRegistration Register_FLOOR_MOD(); +TfLiteRegistration Register_FULLY_CONNECTED(); TfLiteRegistration Register_GATHER(); TfLiteRegistration Register_GATHER_ND(); +TfLiteRegistration Register_GREATER(); +TfLiteRegistration Register_GREATER_EQUAL(); TfLiteRegistration Register_HARD_SWISH(); TfLiteRegistration Register_IF(); TfLiteRegistration Register_L2_POOL_2D(); TfLiteRegistration Register_LEAKY_RELU(); +TfLiteRegistration Register_LESS(); +TfLiteRegistration Register_LESS_EQUAL(); TfLiteRegistration Register_LOG_SOFTMAX(); TfLiteRegistration Register_LOGICAL_AND(); TfLiteRegistration Register_LOGICAL_OR(); TfLiteRegistration Register_LOGISTIC(); TfLiteRegistration Register_MAX_POOL_2D(); +TfLiteRegistration Register_MAXIMUM(); +TfLiteRegistration Register_MEAN(); +TfLiteRegistration Register_MINIMUM(); TfLiteRegistration Register_MIRROR_PAD(); -TfLiteRegistration Register_NEG(); -TfLiteRegistration Register_PRELU(); TfLiteRegistration Register_MUL(); +TfLiteRegistration Register_NEG(); +TfLiteRegistration Register_NOT_EQUAL(); +TfLiteRegistration Register_PACK(); TfLiteRegistration Register_PAD(); TfLiteRegistration Register_PADV2(); +TfLiteRegistration Register_PRELU(); TfLiteRegistration Register_QUANTIZE(); TfLiteRegistration Register_READ_VARIABLE(); +TfLiteRegistration Register_REDUCE_MAX(); TfLiteRegistration Register_RELU(); TfLiteRegistration Register_RELU6(); TfLiteRegistration Register_RESIZE_BILINEAR(); TfLiteRegistration Register_SELECT_V2(); TfLiteRegistration Register_SHAPE(); TfLiteRegistration Register_SLICE(); +TfLiteRegistration Register_SOFTMAX(); TfLiteRegistration Register_SPACE_TO_BATCH_ND(); TfLiteRegistration Register_SPACE_TO_DEPTH(); +TfLiteRegistration Register_SPLIT_V(); TfLiteRegistration Register_SQUARED_DIFFERENCE(); TfLiteRegistration Register_SQUEEZE(); +TfLiteRegistration Register_STRIDED_SLICE(); TfLiteRegistration Register_SUB(); TfLiteRegistration Register_SUM(); TfLiteRegistration Register_SVDF(); @@ -99,31 +119,17 @@ namespace ops { namespace micro { TfLiteRegistration Register_ABS(); -TfLiteRegistration Register_CEIL(); -TfLiteRegistration Register_CONCATENATION(); TfLiteRegistration Register_COS(); -TfLiteRegistration Register_EQUAL(); -TfLiteRegistration Register_FLOOR(); -TfLiteRegistration Register_GREATER(); -TfLiteRegistration Register_GREATER_EQUAL(); -TfLiteRegistration Register_LESS(); -TfLiteRegistration Register_LESS_EQUAL(); TfLiteRegistration Register_LOG(); TfLiteRegistration Register_LOGICAL_NOT(); -TfLiteRegistration Register_MAXIMUM(); -TfLiteRegistration Register_MINIMUM(); -TfLiteRegistration Register_NOT_EQUAL(); -TfLiteRegistration Register_PACK(); TfLiteRegistration Register_RESHAPE(); TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR(); TfLiteRegistration Register_ROUND(); TfLiteRegistration Register_RSQRT(); TfLiteRegistration Register_SIN(); TfLiteRegistration Register_SPLIT(); -TfLiteRegistration Register_SPLIT_V(); TfLiteRegistration Register_SQRT(); TfLiteRegistration Register_SQUARE(); -TfLiteRegistration Register_STRIDED_SLICE(); TfLiteRegistration Register_UNPACK(); TfLiteRegistration Register_L2_NORMALIZATION(); TfLiteRegistration Register_TANH(); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc index dd5dfc40c..45e7c1e4c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); - if (output->type == kTfLiteInt8) { + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &data->output_activation_min, &data->output_activation_max)); @@ -68,6 +68,12 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, data->input1_zero_point = input1->params.zero_point; data->input2_zero_point = input2->params.zero_point; data->output_zero_point = output->params.zero_point; + + if (input1->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, data->input1_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->input2_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->output_zero_point, 0); + } } else if (output->type == kTfLiteInt32) { CalculateActivationRange(params->activation, &data->output_activation_min, &data->output_activation_max); @@ -148,9 +154,9 @@ TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node, tflite::micro::GetTensorData(output)); } } else if (input1->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0.0); - TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0.0); - TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0.0); + TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0); if (need_broadcast) { reference_integer_ops::BroadcastMul4DSlow( diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc index 5e322b87b..5a4eb4f53 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace pack { + namespace { constexpr int kOutputTensor = 0; @@ -106,12 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -} // namespace pack TfLiteRegistration Register_PACK() { - return tflite::micro::RegisterOp(nullptr, nullptr, pack::Eval); + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc index d9b147ad8..050913c5a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,7 +43,12 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { AveragePoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - AveragePoolingEvalQuantized(context, node, params, data, input, output); + AveragePoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + AveragePoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Input type %s is not currently supported", @@ -71,7 +76,12 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { MaxPoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - MaxPoolingEvalQuantized(context, node, params, data, input, output); + MaxPoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + MaxPoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Type %s not currently supported.", diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h index 493250ee1..7b322480c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h @@ -20,7 +20,14 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/micro_log.h" namespace tflite { @@ -50,27 +57,69 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::AveragePool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} #if defined(CMSIS_NN) TfLiteRegistration Register_AVERAGE_POOL_2D_INT8(); TfLiteRegistration Register_MAX_POOL_2D_INT8(); + +TfLiteRegistration Register_AVERAGE_POOL_2D_INT16(); + +TfLiteRegistration Register_MAX_POOL_2D_INT16(); #else inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { return tflite::Register_AVERAGE_POOL_2D(); @@ -79,6 +128,14 @@ inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { inline TfLiteRegistration Register_MAX_POOL_2D_INT8() { return tflite::Register_MAX_POOL_2D(); } + +inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() { + return tflite::Register_AVERAGE_POOL_2D(); +} + +inline TfLiteRegistration Register_MAX_POOL_2D_INT16() { + return tflite::Register_MAX_POOL_2D(); +} #endif } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc index ddc18f0bb..b39e9d846 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,10 +69,14 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) { if (input->type == kTfLiteFloat32) { CalculateActivationRange(params->activation, &data->activation_min_f32, &data->activation_max_f32); - } else if (input->type == kTfLiteInt8) { + } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { CalculateActivationRangeQuantized(context, params->activation, output, &data->activation_min, &data->activation_max); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; } micro_context->DeallocateTempTfLiteTensor(input); @@ -102,30 +106,6 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, tflite::micro::GetTensorData(output)); } -void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->type == kTfLiteInt8); - - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::AveragePool( - op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, @@ -145,26 +125,4 @@ void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, tflite::micro::GetTensorData(output)); } -void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::MaxPool(op_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc index d0002d57c..236536f15 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,9 +22,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace split_v { + +namespace { template TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, @@ -119,12 +118,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace split_v +} // namespace TfLiteRegistration Register_SPLIT_V() { - return tflite::micro::RegisterOp(nullptr, split_v::Prepare, split_v::Eval); + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc index 9985cf913..fede95480 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace strided_slice { + +namespace { constexpr int kInputTensor = 0; constexpr int kBeginTensor = 1; @@ -198,13 +197,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } return kTfLiteOk; } -} // namespace strided_slice + +} // namespace TfLiteRegistration Register_STRIDED_SLICE() { - return tflite::micro::RegisterOp(strided_slice::Init, strided_slice::Prepare, - strided_slice::Eval); + return tflite::micro::RegisterOp(Init, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc index 7ad3aa6aa..d6647462f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc @@ -52,14 +52,12 @@ TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, const float twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = - static_cast(input1->params.scale) / - static_cast(twice_max_input_scale); + static_cast(input1->params.scale / twice_max_input_scale); const double real_input2_multiplier = - static_cast(input2->params.scale) / - static_cast(twice_max_input_scale); + static_cast(input2->params.scale / twice_max_input_scale); const double real_output_multiplier = - static_cast(twice_max_input_scale) / - ((1 << data->left_shift) * static_cast(output->params.scale)); + static_cast(twice_max_input_scale / + ((1 << data->left_shift) * output->params.scale)); QuantizeMultiplierSmallerThanOneExp( real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc index ed74358bc..fb92b4fd7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc @@ -451,21 +451,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - const double effective_scale_1 = - static_cast(input->params.scale) * - static_cast(weights_feature->params.scale) / - static_cast(activation_state->params.scale); + const double effective_scale_1 = static_cast( + input->params.scale * weights_feature->params.scale / + activation_state->params.scale); const double effective_scale_2 = - static_cast(activation_state->params.scale) * - static_cast(weights_time->params.scale) / - static_cast(output->params.scale); + static_cast(activation_state->params.scale * + weights_time->params.scale / output->params.scale); // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready. TF_LITE_ENSURE( context, std::abs(static_cast(bias->params.scale) - - (static_cast(activation_state->params.scale) * - static_cast(weights_time->params.scale))) < 1e-5); + static_cast(activation_state->params.scale * + weights_time->params.scale)) < 1e-5); QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a), &(data->effective_scale_1_b)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD index e7187ef0d..0698846ff 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD @@ -15,3 +15,16 @@ cc_library( hdrs = ["conv_test_data.h"], deps = ["//tensorflow/lite/c:common"], ) + +cc_library( + name = "lstm_test_data", + srcs = ["lstm_test_data.cc"], + hdrs = [ + "lstm_test_data.h", + ], + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/micro:test_helpers", + "//tensorflow/lite/micro/kernels:lstm_shared", + ], +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc new file mode 100644 index 000000000..4d7d9d9ed --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc @@ -0,0 +1,309 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" + +#include + +namespace tflite { +namespace testing { + +namespace { +// LSTM internal setting (e.g., nonlinear activation type) +// Only UnidirectionalLSTM is supported now +constexpr TfLiteUnidirectionalSequenceLSTMParams kDefaultBuiltinData = { + /*.activation=*/kTfLiteActTanh, + /*.cell_clip=*/6, + /*.proj_clip=*/3, + /*.time_major=*/false, + /*.asymmetric_quantize_inputs=*/true, + /*diagonal_recurrent_tensors=*/false}; +} // namespace + +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData() { + GateOutputCheckData<4, 4> gate_data; + const float input_data[4] = { + 0.2, 0.3, // batch1 + -0.98, 0.62 // batch2 + }; + std::memcpy(gate_data.input_data, input_data, 4 * sizeof(float)); + + const float hidden_state[4] = { + -0.1, 0.2, // batch1 + -0.3, 0.5 // batch2 + }; + std::memcpy(gate_data.hidden_state, hidden_state, 4 * sizeof(float)); + + const float cell_state[4] = { + -1.3, 6.2, // batch1 + -7.3, 3.5 // batch2 + }; + std::memcpy(gate_data.cell_state, cell_state, 4 * sizeof(float)); + + // Use the forget gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[-10,-10],[-20,-20]][0.2, + // +[[-10,-10],[-20,-20]][-0.1, 0.2]+[1,2]) = sigmoid([-5,-10]) = + // [6.69285092e-03, 4.53978687e-05] (Batch1) + // Similarly, we have [0.93086158 0.9945137 ] for batch 2 + const float expected_forget_gate_output[4] = {6.69285092e-3f, 4.53978687e-5f, + 0.93086158, 0.9945137}; + std::memcpy(gate_data.expected_forget_gate_output, + expected_forget_gate_output, 4 * sizeof(float)); + + // Use the input gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[10,10],[20,20]][0.2, 0.3] + // +[[10,10],[20,20]][-0.1, 0.2]+[-1,-2]) = sigmoid([5,10]) = + // [0.99330715, 0.9999546] + // Similarly, we have [0.06913842 0.0054863 ] for batch 2 + const float expected_input_gate_output[4] = {0.99330715, 0.9999546, + 0.06913842, 0.0054863}; + std::memcpy(gate_data.expected_input_gate_output, expected_input_gate_output, + 4 * sizeof(float)); + + // Use the output gate parameters to test normnal gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = sigmoid([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [[0.46008512 0.46008512]] for batch 2 + const float expected_output_gate_output[4] = { + 0.6456563062257954, 0.6456563062257954, 0.46008512, 0.46008512}; + std::memcpy(gate_data.expected_output_gate_output, + expected_output_gate_output, 4 * sizeof(float)); + + // Use the cell(modulation) gate parameters to tanh output + // output = tanh(W_i*i+W_h*h+b) = tanh([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = tanh([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [-0.1586485 -0.1586485] for batch 2 + const float expected_cell_gate_output[4] = { + 0.5370495669980353, 0.5370495669980353, -0.1586485, -0.1586485}; + std::memcpy(gate_data.expected_cell_gate_output, expected_cell_gate_output, + 4 * sizeof(float)); + + // Cell = forget_gate*cell + input_gate*cell_gate + // Note -6.80625824 is clipped to -6 + const float expected_updated_cell[4] = {0.52475447, 0.53730665, -6, + 3.47992756}; + std::memcpy(gate_data.expected_updated_cell, expected_updated_cell, + 4 * sizeof(float)); + + // Use the updated cell state to update the hidden state + // tanh(expected_updated_cell) * expected_output_gate_output + const float expected_updated_hidden[4] = {0.31079388, 0.3169827, -0.46007947, + 0.45921249}; + std::memcpy(gate_data.expected_updated_hidden, expected_updated_hidden, + 4 * sizeof(float)); + return gate_data; +} + +// TODO(b/253466487): document how the golden values are arrived at +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData() { + LstmEvalCheckData<12, 4, 12> eval_data; + const float input_data[12] = { + 0.2, 0.3, 0.2, 0.3, 0.2, 0.3, // batch one + -0.98, 0.62, 0.01, 0.99, 0.49, -0.32 // batch two + }; + std::memcpy(eval_data.input_data, input_data, 12 * sizeof(float)); + + // Initialize hidden state as zeros + const float hidden_state[4] = {}; + std::memcpy(eval_data.hidden_state, hidden_state, 4 * sizeof(float)); + + // The expected model output after 3 time steps using the fixed input and + // parameters + const float expected_output[12] = { + 0.26455893, 0.26870455, 0.47935803, + 0.47937014, 0.58013272, 0.58013278, // batch1 + -1.41184672e-3f, -1.43329117e-5f, 0.46887168, + 0.46891281, 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_output, expected_output, 12 * sizeof(float)); + + const float expected_hidden_state[4] = { + 0.58013272, 0.58013278, // batch1 + 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_hidden_state, expected_hidden_state, + 4 * sizeof(float)); + + const float expected_cell_state[4] = { + 0.89740515, 0.8974053, // batch1 + 0.80327607, 0.80327785 // batch2 + }; + std::memcpy(eval_data.expected_cell_state, expected_cell_state, + 4 * sizeof(float)); + return eval_data; +} + +LstmNodeContent +Create2x3x2X2FloatNodeContents(const float* input_data, + const float* hidden_state_data, + const float* cell_state_data) { + // Parameters for different gates + // negative large weights for forget gate to make it really forget + const GateData forget_gate_data = { + /*.activation_weight=*/{-10, -10, -20, -20}, + /*.recurrent_weight=*/{-10, -10, -20, -20}, + /*.fused_bias=*/{1, 2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // positive large weights for input gate to make it really remember + const GateData input_gate_data = { + /*.activation_weight=*/{10, 10, 20, 20}, + /*.recurrent_weight=*/{10, 10, 20, 20}, + /*.fused_bias=*/{-1, -2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of tanh at normal range (-1,1) + const GateData cell_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of sigmoid at normal range (-1. 1) + const GateData output_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + + LstmNodeContent float_node_contents( + kDefaultBuiltinData, forget_gate_data, input_gate_data, cell_gate_data, + output_gate_data); + + if (input_data != nullptr) { + float_node_contents.SetInputData(input_data); + } + if (hidden_state_data != nullptr) { + float_node_contents.SetHiddenStateData(hidden_state_data); + } + if (cell_state_data != nullptr) { + float_node_contents.SetCellStateData(cell_state_data); + } + return float_node_contents; +} + +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings() { + NodeQuantizationParameters quantization_settings; + quantization_settings.activation_type = kTfLiteInt8; + quantization_settings.weight_type = kTfLiteInt8; + quantization_settings.cell_type = kTfLiteInt16; + quantization_settings.bias_type = kTfLiteInt32; + quantization_settings.nonlinear_activation_input_scale = + 0.00024414062; // std::pow(2.0f, -12.0f) + quantization_settings.nonlinear_activation_output_scale = + 0.00003051757; // std::pow(2.0f, -15.0f) + + // state quantization parameters + quantization_settings.input = {/*scale=*/0.00784313725490196, /*zp=*/0, + /*symmetry=*/false}; + quantization_settings.output = {/*scale=*/0.004705882165580988, /*zp=*/-21, + /*symmetry=*/false}; + quantization_settings.hidden_state = {/*scale=*/0.004705882165580988, + /*zp=*/-21, /*symmetry=*/false}; + quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0, + /*symmetry=*/true}; + + // gate quantization parameters + quantization_settings.forget_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.input_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.cell_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.output_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}}; + return quantization_settings; +} + +NodeQuantizationParameters Get2X2Int16LstmQuantizationSettings() { + NodeQuantizationParameters quantization_settings; + quantization_settings.activation_type = kTfLiteInt16; + quantization_settings.weight_type = kTfLiteInt8; + quantization_settings.cell_type = kTfLiteInt16; + quantization_settings.bias_type = kTfLiteInt64; + quantization_settings.nonlinear_activation_input_scale = + 0.00024414062; // std::pow(2.0f, -12.0f) + quantization_settings.nonlinear_activation_output_scale = + 0.00003051757; // std::pow(2.0f, -15.0f) + + // state quantization parameters + quantization_settings.input = {/*scale=*/3.0518044e-5, /*zp=*/0, + /*symmetry=*/false}; + quantization_settings.output = {/*scale=*/1.8310826e-5, /*zp=*/-5461, + /*symmetry=*/false}; + quantization_settings.hidden_state = {/*scale=*/1.8310826e-5, /*zp=*/-5461, + /*symmetry=*/false}; + quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0, + /*symmetry=*/true}; + + // gate quantization parameters + quantization_settings.forget_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.input_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.cell_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.output_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}}; + return quantization_settings; +} + +LstmNodeContent +Create2x3x2X2Int8NodeContents(const float* input_data, + const float* hidden_state, + const float* cell_state) { + auto float_node_content = + Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state); + const auto quantization_settings = Get2X2Int8LstmQuantizationSettings(); + return CreateIntegerNodeContents(quantization_settings, + /*fold_zero_point=*/true, + float_node_content); +} + +LstmNodeContent +Create2x3x2X2Int16NodeContents(const float* input_data, + const float* hidden_state, + const float* cell_state) { + auto float_node_content = + Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state); + const auto quantization_settings = Get2X2Int16LstmQuantizationSettings(); + return CreateIntegerNodeContents(quantization_settings, + /*fold_zero_point=*/false, + float_node_content); +} + +} // namespace testing +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h new file mode 100644 index 000000000..3edf4200a --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h @@ -0,0 +1,579 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/test_helpers.h" + +namespace tflite { +namespace testing { +// Data structure to store all the data used to check output of internal gates +// of one time step +// input_size = batch_size*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +template +struct GateOutputCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float cell_state[gate_output_size]; + float expected_forget_gate_output[gate_output_size]; + float expected_input_gate_output[gate_output_size]; + float expected_output_gate_output[gate_output_size]; + float expected_cell_gate_output[gate_output_size]; + float expected_updated_cell[gate_output_size]; + float expected_updated_hidden[gate_output_size]; +}; + +// Data structure to store all the data used to check the output of the kernel +// of multiple batch, multiple timesteps +// input_size = batch_size*time_steps*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +// output_size = time_steps*gate_output_size (size of the output from the +// kernel) +template +struct LstmEvalCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float expected_output[output_size]; + float expected_hidden_state[gate_output_size]; + float expected_cell_state[gate_output_size]; +}; + +// Struct that holds the weight/bias information for a standard gate (i.e. no +// modification such as layer normalization, peephole, etc.) +// Every gate is defined by the type and size of the weights (bias included) +// inside. +// Specifically, types are weight type and bias type (normally the same +// type of MatMul accumulator). +// activation_weight has shape (hidden state dimension * input tensor dimension) +// recurrent_weight has shape (hidden state dimension * hidden state dimension) +// bias has shape (hidden state dimension, 1) +template +struct GateData { + WeightType activation_weight[state_dimension * input_dimension]; + WeightType recurrent_weight[state_dimension * state_dimension]; + BiasType fused_bias[state_dimension]; + // Quantized model folded the zero point of activations into biases: + // bias + zero_point * weight. + // Note: folded bias is only required for the legacy 8x8->16 pass. Therefore + // the data type is fixed here to avoid compilation errors (the computation of + // folding does not support other types) + int32_t activation_zp_folded_bias[state_dimension]; + int32_t recurrent_zp_folded_bias[state_dimension]; +}; + +// A struct that holds quantization parameters for a LSTM Tensor +struct TensorQuantizationParameters { + double scale; + int zero_point; + bool symmetry; +}; + +// A struct that holds quantization parameters for an internal gate, which is +// defined by activation/recurrent weight and bias (assuming no internal layer +// normalization) +struct GateQuantizationParameters { + TensorQuantizationParameters activation_weight; + TensorQuantizationParameters recurrent_weight; + TensorQuantizationParameters bias; +}; + +// A struct that holds the quantization settings for the LSTM node. Data +// members can be grouped into five parts. +// 1. Data types (activation,weight, cell, bias) +// 2. Non-linear activation (i.e., tanh and sigmoid) fixed point +// calculation settings +// 3. Input/output tensor quantization settings +// 4. Internal state (hidden and cell) quantization settings +// 5. Internal gate (forget, input, cell, output) settings +struct NodeQuantizationParameters { + TfLiteType activation_type; + TfLiteType weight_type; + TfLiteType cell_type; + TfLiteType bias_type; + // Fixed point setting for integer nonlinear activation calculation + double nonlinear_activation_input_scale; + double nonlinear_activation_output_scale; + // Quantization parameters for input/output + TensorQuantizationParameters input; + TensorQuantizationParameters output; + // Quantization parameters for internal states + TensorQuantizationParameters hidden_state; + TensorQuantizationParameters cell_state; + // Quantization parameters for gates + GateQuantizationParameters forget_gate; + GateQuantizationParameters input_gate; + GateQuantizationParameters cell_gate; + GateQuantizationParameters output_gate; +}; + +// Data structure that holds all the information to evaluate a LSTM kernel +// (mimic the LSTM node). +// Tensor Types: +// ActivationType defines the data type of input/output of the layer. The hidden +// state has the ActivationType as well since it is the layer output of the +// previous time. +// WeightType defines the weight data type inside the internal gates. +// BiasType defines the bias data type inside the internal gates. (normally the +// same type of MatMul accumulator). +// Tensor Shapes: +// The input to the layer has shape (batch_size,time_steps,input_dimension). +// Both the hidden state and cell state has shape (state_dimension, 1) +// The output of the layer has shape (batch_size,time_steps,state_dimension) +// Note: state values can change through calls (stateful) +template +class LstmNodeContent { + public: + LstmNodeContent(const LstmNodeContent& other) = default; + LstmNodeContent& operator=(const LstmNodeContent& other) = default; + // Use the general model setting (builtin data) and the four gates data to + // construct the node content. Note the input, hidden state, and cell state + // data is provided later for flexible testing (initialize as zero now) + LstmNodeContent( + const TfLiteUnidirectionalSequenceLSTMParams builtin_data, + const GateData + forget_gate_params, + const GateData + input_gate_params, + const GateData + cell_gate_params, + const GateData + output_gate_params) + : builtin_data_(builtin_data), + forget_gate_data_(forget_gate_params), + input_gate_data_(input_gate_params), + cell_gate_data_(cell_gate_params), + output_gate_data_(output_gate_params) { + InitializeTensors(); + } + + // Add quantization parameters (scale, zero point) to tensors + // Only required for the integer kernel + void AddQuantizationParameters( + const NodeQuantizationParameters& quantization_params) { + quantization_settings_ = quantization_params; + // Input Tensor + SetTensorQuantizationParam(kLstmInputTensor, quantization_params.input); + // Forget Gate Tensors + const auto& forget_gate_quant_param = quantization_params.forget_gate; + SetTensorQuantizationParam(kLstmInputToForgetWeightsTensor, + forget_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToForgetWeightsTensor, + forget_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmForgetGateBiasTensor, + forget_gate_quant_param.bias); + // Input Gate Tensors + const auto& input_gate_quant_param = quantization_params.input_gate; + SetTensorQuantizationParam(kLstmInputToInputWeightsTensor, + input_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToInputWeightsTensor, + input_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmInputGateBiasTensor, + input_gate_quant_param.bias); + // Cell Gate Tensors + const auto& cell_gate_quant_param = quantization_params.cell_gate; + SetTensorQuantizationParam(kLstmInputToCellWeightsTensor, + cell_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToCellWeightsTensor, + cell_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmCellGateBiasTensor, + cell_gate_quant_param.bias); + // Output Gate Tensors + const auto& output_gate_quant_param = quantization_params.output_gate; + SetTensorQuantizationParam(kLstmInputToOutputWeightsTensor, + output_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToOutputWeightsTensor, + output_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmOutputGateBiasTensor, + output_gate_quant_param.bias); + // State Tensors + SetTensorQuantizationParam(kLstmOutputStateTensor, + quantization_params.hidden_state); + SetTensorQuantizationParam(kLstmCellStateTensor, + quantization_params.cell_state); + // Output Tensor + SetTensorQuantizationParam(24, quantization_params.output); + } + + // Provide interface to set the input tensor values for flexible testing + void SetInputData(const ActivationType* data) { + std::memcpy( + input_, data, + batch_size * input_dimension * time_steps * sizeof(ActivationType)); + SetTensor(kLstmInputTensor, input_, input_size_); + } + const ActivationType* GetInputData() const { return input_; } + + // Provide interface to set the hidden state tensor values for flexible + // testing + void SetHiddenStateData(const ActivationType* data) { + std::memcpy(hidden_state_, data, + batch_size * state_dimension * sizeof(ActivationType)); + } + ActivationType* GetHiddenStateData() { return hidden_state_; } + + // Provide interface to set the cell state tensor values for flexible + // testing + void SetCellStateData(const CellType* data) { + std::memcpy(cell_state_, data, + batch_size * state_dimension * sizeof(CellType)); + } + CellType* GetCellStateData() { return cell_state_; } + ActivationType* GetOutputData() { return output_; } + + // Internal tensors, see lstm_shared.h for tensor names + TfLiteEvalTensor* GetEvalTensor(const int tensor_index) { + auto valid_index = input_tensor_indices_[tensor_index + 1]; + if (valid_index < 0) { + return nullptr; + } + return &eval_tensors_[tensor_index]; + } + + TfLiteTensor* GetTensors() { return tensors_; } + + // Required by the kernel runner + TfLiteIntArray* KernelInputs() { + return IntArrayFromInts(input_tensor_indices_); + } + // Required by the kernel runner + TfLiteIntArray* KernelOutputs() { + return IntArrayFromInts(output_tensor_indices_); + } + + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateEvalTensor() { + return &eval_tensors_[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateEvalTensor() { + return &eval_tensors_[kLstmCellStateTensor]; + } + TfLiteEvalTensor* OutputEvalTensor() { return &eval_tensors_[24]; } + + const GateData& + ForgetGateData() const { + return forget_gate_data_; + } + const GateData& + InputGateData() const { + return input_gate_data_; + } + const GateData& + CellGateData() const { + return cell_gate_data_; + } + const GateData& + OutputGateData() const { + return output_gate_data_; + } + + const TfLiteUnidirectionalSequenceLSTMParams& BuiltinData() const { + return builtin_data_; + } + + const NodeQuantizationParameters& QuantizationSettings() const { + return quantization_settings_; + } + + private: + void InitializeTensors() { + // Invalid all the input tensors untill we set it + input_tensor_indices_[0] = 24; // tot elements + for (size_t i = 1; i < 25; i++) { + input_tensor_indices_[i] = kTfLiteOptionalTensor; + } + // Input Tensor + SetTensor(kLstmInputTensor, input_, input_size_); + // Forget Gate Tensors + SetTensor(kLstmInputToForgetWeightsTensor, + forget_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToForgetWeightsTensor, + forget_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmForgetGateBiasTensor, forget_gate_data_.fused_bias, + bias_size_); + // Input Gate Tensors + SetTensor(kLstmInputToInputWeightsTensor, + input_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToInputWeightsTensor, + input_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmInputGateBiasTensor, input_gate_data_.fused_bias, + bias_size_); + // Cell Gate Tensors + SetTensor(kLstmInputToCellWeightsTensor, cell_gate_data_.activation_weight, + activation_weight_size_); + SetTensor(kLstmRecurrentToCellWeightsTensor, + cell_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmCellGateBiasTensor, cell_gate_data_.fused_bias, bias_size_); + // Output Gate Tensors + SetTensor(kLstmInputToOutputWeightsTensor, + output_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToOutputWeightsTensor, + output_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmOutputGateBiasTensor, output_gate_data_.fused_bias, + bias_size_); + // State Tensors + SetTensor(kLstmOutputStateTensor, hidden_state_, state_size_, + /*is_variable=*/true); + SetTensor(kLstmCellStateTensor, cell_state_, state_size_, + /*is_variable=*/true); + // // Output Tensor + SetTensor(24, output_, output_size_, /*is_variable=*/true); + } + + template + void SetTensor(const int index, const T* data, int* dims, + const bool is_variable = false) { + // Lite tensors for kernel level testing + tensors_[index].data.data = const_cast(data); + tensors_[index].dims = IntArrayFromInts(dims); + tensors_[index].type = typeToTfLiteType(); + tensors_[index].is_variable = is_variable; + // Eval tensors for internal computation testing + eval_tensors_[index].data.data = const_cast(data); + eval_tensors_[index].dims = IntArrayFromInts(dims); + eval_tensors_[index].type = typeToTfLiteType(); + // update the index + if (index < 24) { + input_tensor_indices_[index + 1] = index; + } + } + + void SetTensorQuantizationParam( + const int index, const TensorQuantizationParameters& quant_param) { + tensors_[index].params.scale = quant_param.scale; + tensors_[index].params.zero_point = quant_param.zero_point; + } + + const TfLiteUnidirectionalSequenceLSTMParams builtin_data_; + GateData + forget_gate_data_; + GateData + input_gate_data_; + GateData + cell_gate_data_; + GateData + output_gate_data_; + + // Keep to ease the testing process (although all quantization information can + // be obtained from individual tensors, they are well organized here and light + // weighted) + NodeQuantizationParameters quantization_settings_; + + // Not const since IntArrayFromInts takes int *; the first element of the + // array must be the size of the array + int input_size_[4] = {3, batch_size, time_steps, input_dimension}; + int output_size_[4] = {3, batch_size, time_steps, state_dimension}; + // weight tensor has C-style "row-major" memory ordering + int activation_weight_size_[3] = {2, state_dimension, input_dimension}; + int recurrent_weight_size_[3] = {2, state_dimension, state_dimension}; + int bias_size_[2] = {1, state_dimension}; + int state_size_[3] = {2, batch_size, state_dimension}; + + // see lstm_shared.h for tensor names, the last tensor is the output tensor + TfLiteTensor tensors_[24 + 1]; + // Use for internel kernel testing + TfLiteEvalTensor eval_tensors_[24 + 1]; + // indices for the tensors inside the node (required by kernel runner) + int input_tensor_indices_[1 + 24] = {}; + // single output (last in the tensors array) + int output_tensor_indices_[2] = {1, 24}; + + // tennsor data + // states are initialized to zero + ActivationType hidden_state_[batch_size * state_dimension] = {}; + CellType cell_state_[batch_size * state_dimension] = {}; + // input is defined in the ModelContent (const across all derived models) + ActivationType input_[batch_size * input_dimension * time_steps] = {}; + ActivationType output_[batch_size * state_dimension * time_steps] = {}; +}; + +// Converts floating point gate parameters to the corresponding quantized +// version +template +GateData +CreateQuantizedGateData( + const GateData& + gate_parameters, + const TensorQuantizationParameters& input_quantization_params, + const TensorQuantizationParameters& output_quantization_params, + const GateQuantizationParameters& gate_quantization_params, + const bool fold_zero_point) { + GateData + quantized_gate_params; + tflite::SymmetricQuantize(gate_parameters.activation_weight, + quantized_gate_params.activation_weight, + state_dimension * input_dimension, + gate_quantization_params.activation_weight.scale); + tflite::SymmetricQuantize(gate_parameters.recurrent_weight, + quantized_gate_params.recurrent_weight, + state_dimension * state_dimension, + gate_quantization_params.recurrent_weight.scale); + tflite::SymmetricQuantize(gate_parameters.fused_bias, + quantized_gate_params.fused_bias, state_dimension, + gate_quantization_params.bias.scale); + // Note: steps below are not required for the generalized LSTM evaluation + // (e.g., 16bits activation) + if (fold_zero_point) { + // Copy the bias values to prepare zero_point folded + // bias precomputation. bias has same scale as + // input_scale*input_weight_scale) + std::memcpy(quantized_gate_params.activation_zp_folded_bias, + quantized_gate_params.fused_bias, 2 * sizeof(int32_t)); + // Pre-calculate bias - zero_point * weight (a constant). + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_params.activation_weight, + -1 * input_quantization_params.zero_point, 2, 2, + quantized_gate_params.activation_zp_folded_bias); + + // Initialize the folded bias to zeros for accumulation + for (size_t i = 0; i < 2; i++) { + quantized_gate_params.recurrent_zp_folded_bias[i] = 0; + } + // Calculate : -zero_point * weight since it is a constant + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_params.recurrent_weight, + -1 * output_quantization_params.zero_point, 2, 2, + quantized_gate_params.recurrent_zp_folded_bias); + } + return quantized_gate_params; +} + +// Create integer LSTM node content from the float node contents and +// quantization settings +// Note: fold_zero_point folds the zero point into the bias (precomputation), +// which is not required for the generalized integer inference (16 bits act +// LSTM). +template +LstmNodeContent +CreateIntegerNodeContents( + const NodeQuantizationParameters& quantization_settings, + const bool fold_zero_point, + LstmNodeContent& float_node_contents) { + const auto quantized_forget_gate_data = + CreateQuantizedGateData( + float_node_contents.ForgetGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.forget_gate, + fold_zero_point); + const auto quantized_input_gate_data = + CreateQuantizedGateData( + float_node_contents.InputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.input_gate, + fold_zero_point); + const auto quantized_cell_gate_data = + CreateQuantizedGateData( + float_node_contents.CellGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.cell_gate, + fold_zero_point); + const auto quantized_output_gate_params = + CreateQuantizedGateData( + float_node_contents.OutputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.output_gate, + fold_zero_point); + LstmNodeContent + quantized_node_content( + float_node_contents.BuiltinData(), quantized_forget_gate_data, + quantized_input_gate_data, quantized_cell_gate_data, + quantized_output_gate_params); + + // Quantize the floating point input + ActivationType quantized_input[batch_size * input_dimension * time_steps] = + {}; + Quantize(float_node_contents.GetInputData(), quantized_input, + batch_size * input_dimension * time_steps, + quantization_settings.input.scale, + quantization_settings.input.zero_point); + quantized_node_content.SetInputData(quantized_input); + // Quantize the floating point hidden state + ActivationType quantized_hidden_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetHiddenStateData(), quantized_hidden_state, + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + quantized_node_content.SetHiddenStateData(quantized_hidden_state); + // Quantize the floating point cell state + CellType quantized_cell_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetCellStateData(), quantized_cell_state, + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point); + quantized_node_content.SetCellStateData(quantized_cell_state); + + // Add scale and zero point to tensors + quantized_node_content.AddQuantizationParameters(quantization_settings); + return quantized_node_content; +} + +// Get the gate output data (one time step) for a simple 2X2 model +// batch_size = 2; time_steps = 1; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 4 +// gate_output_size = batch_size*state_dimension = 4 +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData(); + +// Get the kernel output data for a simple 2X2 model +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 12 +// gate_output_size = batch_size*state_dimension = 4 +// output_size = time_steps*gate_output_size = 12 +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData(); + +// Create a 2x2 float node content +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +LstmNodeContent +Create2x3x2X2FloatNodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +// Get the quantization settings for the 2X2 model +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings(); + +// Create int8 (activation) x int8 (weight) -> int16 (cell) node +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input is in float format since the source of truth is always the float +// configuration +LstmNodeContent +Create2x3x2X2Int8NodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +// Create int16 (activation) x int8 (weight) -> int16 (cell) node +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input is in float format since the source of truth is always the float +// configuration +LstmNodeContent +Create2x3x2X2Int16NodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc index f8b231349..e671abec5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,1367 +13,570 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include +// Integer version of unidirectional sequence lstm. Only the standard LSTM +// (defined in the keras LSTM layer, e.g., no peephole etc.) is supported here. +// Currently used by the 16 bits activation case only + +#include +#include -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/fully_connected.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/lstm_eval.h" #include "tensorflow/lite/micro/kernels/lstm_shared.h" -#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h" -#include "tensorflow/lite/micro/micro_log.h" namespace tflite { namespace { - -constexpr int scratch_index_size = 12; - -struct UnidirectionalSequenceLstmOpData { - // If the lstm is layer norm. - bool use_layer_norm; - // The scratch index. - int scratch_index[scratch_index_size]; - - int32_t row_sums_size; - int32_t* row_sums; - bool compute_row_sums = false; - - int32_t input_zero_point; - int32_t output_state_zero_point; - - IntegerLstmParameter integer_lstm_param; -}; - -TfLiteStatus PopulateQuantizedLstmParams8x8_16( - TfLiteContext* context, TfLiteNode* node, - IntegerLstmParameter* integer_lstm_param) { - MicroContext* micro_context = GetMicroContext(context); - - // Calculate quantized clip for projection and cell. - const auto* params = - static_cast(node->builtin_data); - const float cell_clip = params->cell_clip; - const float proj_clip = params->proj_clip; - - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - TfLiteTensor* output_tensor = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TF_LITE_ENSURE(context, - cell_state->quantization.type != kTfLiteNoQuantization); - auto* cell_state_params = - static_cast(cell_state->quantization.params); - TF_LITE_ENSURE(context, - output_tensor->quantization.type != kTfLiteNoQuantization); - auto* proj_params = static_cast( - output_tensor->quantization.params); - if (cell_clip > 0.0f) { - integer_lstm_param->quantized_cell_clip = static_cast(std::min( - std::max(cell_clip / cell_state_params->scale->data[0], -32768.0f), - 32767.0f)); - } else { - integer_lstm_param->quantized_cell_clip = 0; - } - if (proj_clip > 0.0f) { - integer_lstm_param->quantized_proj_clip = static_cast(std::min( - std::max(proj_clip / proj_params->scale->data[0], -128.0f), 127.0f)); - } else { - integer_lstm_param->quantized_proj_clip = 0; +/*Helper Functions*/ + +// Interface to access all the TempTfLiteTensors of the LSTM kernel during the +// preparation phase. Can only be constructed through the constructor to avoid +// memory leakage. All TempTfLiteTensors will be deallocated through the +// destructor. +class LstmTensors { + public: + LstmTensors(const LstmTensors& other) = delete; + LstmTensors& operator=(const LstmTensors& other) = delete; + + LstmTensors(TfLiteContext* context, TfLiteNode* node) { + micro_context_ = GetMicroContext(context); + // 24 internal tensors. see lstm_shared.h for tensor names + for (size_t i = 0; i < 24; i++) { + internal_tensors_[i] = micro_context_->AllocateTempInputTensor(node, i); + } + output_tensor_ = + micro_context_->AllocateTempOutputTensor(node, kLstmOutputTensor); } - // Calculate effective scales. - UnidirectionalSequenceLstmOpData* op_data = - static_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool use_peephole = (cell_to_output_weights != nullptr); - const bool use_projection = (projection_weights != nullptr); - - // Get intermediate scales and zero points. - float intermediate_scale[5]; - int32_t intermediate_zp[5]; - for (int i = 0; i < 4; ++i) { - if (use_layer_norm) { - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, i); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - auto* params_intermediate = static_cast( - intermediate->quantization.params); - intermediate_scale[i] = params_intermediate->scale->data[0]; - intermediate_zp[i] = params_intermediate->zero_point->data[0]; - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); + ~LstmTensors() { + for (size_t i = 0; i < 24; i++) { + if (internal_tensors_[i] != nullptr) { + micro_context_->DeallocateTempTfLiteTensor(internal_tensors_[i]); } - } else { - // Q3.12 for activation functions. - intermediate_scale[i] = std::pow(2.0f, -12.0f); - intermediate_zp[i] = 0; } - } - // In the absence of projection, hidden becomes otuput and this intermediate - // is ignored. - TfLiteTensor* hidden = micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization); - auto* hidden_params = - static_cast(hidden->quantization.params); - intermediate_scale[4] = hidden_params->scale->data[0]; - intermediate_zp[4] = hidden_params->zero_point->data[0]; - if (hidden != nullptr) { - micro_context->DeallocateTempTfLiteTensor(hidden); - } - - // Scales. - const float default_scale = 1.0; - float input_scale = default_scale; - float input_to_input_weight_scale = default_scale; - float recurrent_to_input_weight_scale = default_scale; - float cell_to_input_weight_scale = default_scale; - float input_to_forget_weight_scale = default_scale; - float recurrent_to_forget_weight_scale = default_scale; - float cell_to_forget_weight_scale = default_scale; - float input_to_cell_weight_scale = default_scale; - float recurrent_to_cell_weight_scale = default_scale; - float input_to_output_weight_scale = default_scale; - float recurrent_to_output_weight_scale = default_scale; - float cell_to_output_weight_scale = default_scale; - float projection_weight_scale = default_scale; - float layer_norm_input_scale = default_scale; - float layer_norm_forget_scale = default_scale; - float layer_norm_cell_scale = default_scale; - float layer_norm_output_scale = default_scale; - float output_state_scale = default_scale; - int cell_scale = 1; - - // Effective scales. - float effective_input_to_input_scale = default_scale; - float effective_recurrent_to_input_scale = default_scale; - float effective_cell_to_input_scale = default_scale; - float effective_input_to_forget_scale = default_scale; - float effective_recurrent_to_forget_scale = default_scale; - float effective_cell_to_forget_scale = default_scale; - float effective_input_to_cell_scale = default_scale; - float effective_recurrent_to_cell_scale = default_scale; - float effective_input_to_output_scale = default_scale; - float effective_recurrent_to_output_scale = default_scale; - float effective_cell_to_output_scale = default_scale; - float effective_proj_scale = default_scale; - float effective_hidden_scale = default_scale; - - // Populate scales. - if (!use_cifg) { - input_to_input_weight_scale = input_to_input_weights->params.scale; - recurrent_to_input_weight_scale = recurrent_to_input_weights->params.scale; - } - - if (use_peephole) { - if (!use_cifg) { - cell_to_input_weight_scale = cell_to_input_weights->params.scale; + micro_context_->DeallocateTempTfLiteTensor(output_tensor_); + } + + // Verify the LSTM internal tensor properties (e.g., type checks) + // Input/output/states/fc weights tensors are required for kernel evaulation. + // The state tensors should be variables. Variants of the standard LSTM + // are not supported here, therefore their corresponding tensors should be + // invalid + TfLiteStatus ValidateTensorStatus(TfLiteContext* context) const { + // Verify certain tensor properties + // input tensor + TF_LITE_ENSURE(context, internal_tensors_[kLstmInputTensor] != nullptr); + // hidden state + TF_LITE_ENSURE(context, + internal_tensors_[kLstmOutputStateTensor] != nullptr); + TF_LITE_ENSURE(context, + internal_tensors_[kLstmOutputStateTensor]->is_variable); + // hidden state becomes input so they must have the same type + TF_LITE_ENSURE_EQ(context, internal_tensors_[kLstmOutputStateTensor]->type, + internal_tensors_[kLstmInputTensor]->type); + // cell state + TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor] != nullptr); + TF_LITE_ENSURE(context, + internal_tensors_[kLstmCellStateTensor]->is_variable); + // output + TF_LITE_ENSURE(context, output_tensor_ != nullptr); + // output type is the same as the input type (activations) + TF_LITE_ENSURE_EQ(context, output_tensor_->type, + internal_tensors_[kLstmInputTensor]->type); + + // weight tensors (1-9, see lstm_shared for index definition) + const auto weight_type = + internal_tensors_[kLstmInputToForgetWeightsTensor]->type; + for (size_t i = 1; i < 9; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, weight_type); } - cell_to_forget_weight_scale = cell_to_forget_weights->params.scale; - cell_to_output_weight_scale = cell_to_output_weights->params.scale; - } - if (use_layer_norm) { - if (!use_cifg) { - layer_norm_input_scale = input_layer_norm_coefficients->params.scale; + // bias tensors (12-15, see lstm_shared for index definition) + const auto bias_type = internal_tensors_[kLstmForgetGateBiasTensor]->type; + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, bias_type); } - layer_norm_forget_scale = forget_layer_norm_coefficients->params.scale; - layer_norm_cell_scale = cell_layer_norm_coefficients->params.scale; - layer_norm_output_scale = output_layer_norm_coefficients->params.scale; - } - - if (use_projection) { - projection_weight_scale = projection_weights->params.scale; - } - output_state_scale = output_state->params.scale; - - input_to_forget_weight_scale = input_to_forget_weights->params.scale; - input_to_cell_weight_scale = input_to_cell_weights->params.scale; - input_to_output_weight_scale = input_to_output_weights->params.scale; - recurrent_to_forget_weight_scale = recurrent_to_forget_weights->params.scale; - recurrent_to_cell_weight_scale = recurrent_to_cell_weights->params.scale; - recurrent_to_output_weight_scale = recurrent_to_output_weights->params.scale; - - // Check cell state (already used above) - TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale)); - // TF_LITE_ENSURE(context, cell_scale <= -9); - integer_lstm_param->cell_scale = cell_scale; - input_scale = input->params.scale; - - // Calculate effective scales. - if (!use_cifg) { - effective_input_to_input_scale = - input_to_input_weight_scale * input_scale / intermediate_scale[0]; - effective_recurrent_to_input_scale = recurrent_to_input_weight_scale * - output_state_scale / - intermediate_scale[0]; - } - effective_input_to_forget_scale = - input_to_forget_weight_scale * input_scale / intermediate_scale[1]; - effective_recurrent_to_forget_scale = recurrent_to_forget_weight_scale * - output_state_scale / - intermediate_scale[1]; - - effective_input_to_cell_scale = - input_to_cell_weight_scale * input_scale / intermediate_scale[2]; - effective_recurrent_to_cell_scale = recurrent_to_cell_weight_scale * - output_state_scale / - intermediate_scale[2]; - - effective_input_to_output_scale = - input_to_output_weight_scale * input_scale / intermediate_scale[3]; - effective_recurrent_to_output_scale = recurrent_to_output_weight_scale * - output_state_scale / - intermediate_scale[3]; - - effective_hidden_scale = - std::pow(2.0f, -15.0f) / intermediate_scale[4] * std::pow(2.0f, -15.0f); - - effective_proj_scale = - projection_weight_scale * intermediate_scale[4] / output_state_scale; - - if (use_peephole) { - if (!use_cifg) { - effective_cell_to_input_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_input_weight_scale / intermediate_scale[0]; + // Tensors from LSTM variants are invalid + // No peephole + for (size_t i = 9; i < 12; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + // No projection + for (size_t i = 16; i < 18; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); } - effective_cell_to_forget_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_forget_weight_scale / intermediate_scale[1]; - effective_cell_to_output_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_output_weight_scale / intermediate_scale[3]; + // No internal layer norm + for (size_t i = 20; i < 24; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + return kTfLiteOk; } - // Decompose scales. - int shift_output; - QuantizeMultiplier(static_cast(effective_input_to_input_scale), - &integer_lstm_param->effective_input_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_input_scale), - &integer_lstm_param->effective_recurrent_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_input_scale), - &integer_lstm_param->effective_cell_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_forget_scale), - &integer_lstm_param->effective_input_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_forget_scale), - &integer_lstm_param->effective_recurrent_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_forget_scale), - &integer_lstm_param->effective_cell_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_cell_scale), - &integer_lstm_param->effective_input_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_cell_scale), - &integer_lstm_param->effective_recurrent_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_output_scale), - &integer_lstm_param->effective_input_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_output_scale), - &integer_lstm_param->effective_recurrent_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_output_scale), - &integer_lstm_param->effective_cell_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_proj_scale), - &integer_lstm_param->effective_proj_scale_a, - &shift_output); - integer_lstm_param->effective_proj_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_hidden_scale), - &integer_lstm_param->effective_hidden_scale_a, - &shift_output); - integer_lstm_param->effective_hidden_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_input_scale), - &integer_lstm_param->layer_norm_input_scale_a, - &shift_output); - integer_lstm_param->layer_norm_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_forget_scale), - &integer_lstm_param->layer_norm_forget_scale_a, - &shift_output); - integer_lstm_param->layer_norm_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_cell_scale), - &integer_lstm_param->layer_norm_cell_scale_a, - &shift_output); - integer_lstm_param->layer_norm_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_output_scale), - &integer_lstm_param->layer_norm_output_scale_a, - &shift_output); - integer_lstm_param->layer_norm_output_scale_b = - static_cast(shift_output); - - integer_lstm_param->hidden_zp = intermediate_zp[4]; - - // 10000 is used to make sure the kernel logic does not overflow. - if (!use_cifg) { - integer_lstm_param->input_variance_guard = - std::max(1, static_cast(10000 * layer_norm_input_scale)); + // Internal tensors. see lstm_shared.h for tensor names + const TfLiteTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors_[tensor_index]; } - integer_lstm_param->forget_variance_guard = - std::max(1, static_cast(10000 * layer_norm_forget_scale)); - integer_lstm_param->cell_variance_guard = - std::max(1, static_cast(10000 * layer_norm_cell_scale)); - integer_lstm_param->output_variance_guard = - std::max(1, static_cast(10000 * layer_norm_output_scale)); - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); - } - if (output_tensor != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_tensor); - } - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); + const TfLiteTensor* HiddenStateTensor() const { + return internal_tensors_[kLstmOutputStateTensor]; } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); + const TfLiteTensor* CellStateTensor() const { + return internal_tensors_[kLstmCellStateTensor]; } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - - return kTfLiteOk; -} + const TfLiteTensor* OutputTensor() const { return output_tensor_; } -// Temporary buffers used for hybrid mode -enum HybridTempBuffer { - kPrimaryScratchBuffer = 0, - kInputQuantized = 1, - kOutputStateQuantized = 2, - kCellStateQuantized = 3, - kInputScalingFactors = 4, - kOutputStateScalingFactors = 5, - kProductScalingFactors = 6, - kRecoveredCellWeights = 7, - kAccumScratch = 8, - kInputZeroPoints = 9, - kOutputStateZeroPoints = 10, - kScales = 11, - kNumHybridTempBuffers = 12, + private: + // see lstm_shared.h for tensor names + MicroContext* micro_context_; + TfLiteTensor* internal_tensors_[24]; + TfLiteTensor* output_tensor_; }; -void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer, - size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer( - context, sizeof(UnidirectionalSequenceLstmOpData)); +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; } -// Check that input tensor dimensions matches with each other. -TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, - TfLiteNode* node, int n_input, - int n_output, int n_cell, - bool use_layer_norm, bool is_integer) { - MicroContext* micro_context = GetMicroContext(context); - - const auto* params = reinterpret_cast(node->builtin_data); - - // Making sure clipping parameters have valid values. - // == 0 means no clipping - // > 0 means clipping - TF_LITE_ENSURE(context, params->cell_clip >= 0); - TF_LITE_ENSURE(context, params->proj_clip >= 0); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - if (input_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); - } - - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); - - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - if (recurrent_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], - n_output); - } - - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], - n_output); - - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], - n_output); - - // We make sure the input-gate's parameters are either both present (regular - // LSTM) or not at all (CIFG-LSTM). - const bool cifg_weights_all_or_none = - ((input_to_input_weights != nullptr) && - (recurrent_to_input_weights != nullptr)) || - ((input_to_input_weights == nullptr) && - (recurrent_to_input_weights == nullptr)); - TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - if (cell_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_input_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - if (cell_to_forget_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_forget_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - if (cell_to_output_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_output_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - // Making sure the peephole weights are there all or none. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool peephole_weights_all_or_none = - ((cell_to_input_weights != nullptr || use_cifg) && - (cell_to_forget_weights != nullptr) && - (cell_to_output_weights != nullptr)) || - ((cell_to_input_weights == nullptr) && - (cell_to_forget_weights == nullptr) && - (cell_to_output_weights == nullptr)); - TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); - - // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmInputGateBiasTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); - } else { - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteFloat32); - } - } - - TfLiteTensor* forget_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* cell_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* output_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmOutputGateBiasTensor); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - if (projection_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); - } - - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); - if (projection_bias != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteFloat32); - } - } - - // Making sure the projection tensors are consistent: - // 1) If projection weight is not present, then projection bias should not be - // present. - // 2) If projection weight is present, then projection bias is optional. - const bool projecton_tensors_consistent = - ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projecton_tensors_consistent == true); - - if (use_layer_norm) { - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients, nullptr); - } else { - TF_LITE_ENSURE(context, input_layer_norm_coefficients != nullptr); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteFloat32); - } - } - - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteFloat32); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } +TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int dim1_size, + int dim2_size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 2); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], dim1_size); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[1], dim2_size); + return kTfLiteOk; +} +TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 1); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], size); return kTfLiteOk; } -TfLiteStatus PrecomputeZeroPointTimesWeightWithBias( - TfLiteContext* context, int32_t zero_point, - const TfLiteTensor* weight_tensor, const TfLiteTensor* bias_tensor, - int32_t** output) { - if (weight_tensor == nullptr) { - return kTfLiteOk; +// Go through every tensors and make sure their shape match the kernel +// configuration +TfLiteStatus ValidateTensorSize(TfLiteContext* context, + const LstmTensors& tensors, + const LstmSizeInfo& size_info) { + // Input FC weights + for (size_t i = 1; i < 5; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.input_dimension)); + } + // Recurrent FC weights + for (size_t i = 5; i < 9; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.state_dimension)); + } + // Biases + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE_OK( + context, ValidateBiasTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension)); } - const RuntimeShape& weight_shape = GetTensorShape(weight_tensor); - TF_LITE_ENSURE_EQ(context, weight_shape.DimensionsCount(), 2); - const int row = weight_shape.Dims(0); - const int col = weight_shape.Dims(1); - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - *output = static_cast( - context->AllocatePersistentBuffer(context, row * sizeof(int32_t))); + // Check the shape of input state tensors. + // These tensor may be 1D or 2D. It's fine as long as the total size is + // correct. + TF_LITE_ENSURE_EQ(context, NumElements(tensors.HiddenStateTensor()), + size_info.batch_size * size_info.state_dimension); + TF_LITE_ENSURE_EQ(context, NumElements(tensors.CellStateTensor()), + size_info.batch_size * size_info.state_dimension); - if (bias_tensor == nullptr) { - memset(*output, 0, row * sizeof(int32_t)); - } else { - const int32_t* bias = GetTensorData(bias_tensor); - memcpy(*output, bias, row * sizeof(int32_t)); - } - if (zero_point != 0) { - const int8_t* weight = GetTensorData(weight_tensor); - tflite::tensor_utils::MatrixScalarMultiplyAccumulate(weight, zero_point, - row, col, *output); - } + // Check the shape of output tensor against that of input tensor + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->size, 3); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[0], + tensors.OutputTensor()->dims->data[0]); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[1], + tensors.OutputTensor()->dims->data[1]); + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->data[2], + size_info.state_dimension); return kTfLiteOk; } -TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias( - TfLiteContext* context, UnidirectionalSequenceLstmOpData* op_data, - TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - const int32_t input_zero_point = -input->params.zero_point; - const int32_t output_state_zero_point = -output_state->params.zero_point; - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); +// Wrapper function to create gate parameters for the four internal LSTM gates +TfLiteStatus CreateGateParams( + TfLiteContext* context, + /*Input tensors*/ + const TfLiteTensor* input, const TfLiteTensor* input_weight, + const TfLiteTensor* input_bias, + /*Hidden state tensors*/ + const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight, + const TfLiteTensor* hidden_state_bias, + /*Scale of the fc output (input to non-linear activation)*/ + const float nonlinear_activation_input_scale, const TfLiteType cell_type, + tflite::GateParameters& gate_params) { + // A temp tflite tensor to represent the output of fc operation. Only the data + // type and quantization parameters are set since it is only used for + // parameter calculations + TfLiteTensor fc_output_temp; + fc_output_temp.type = cell_type; + fc_output_temp.params.scale = nonlinear_activation_input_scale; + fc_output_temp.params.zero_point = 0; // symmetrical quantized + + // A temp fc opdata to reuse the helper function on creating fc parameters + tflite::OpDataFullyConnected fc_data_temp; + // TODO(b/265853320): due to the lack of precision for the float scale, + // scale_diff / output_scale <= 0.02 (potentially requires 1e-8 precision) can + // not be satisified for the bias. Here we rely on the correctiveness of the + // conversion process (set input_bias=nullptr to avoid checking) for + // tensor scales + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, input->type, input, input_weight, + /*input_bias=*/nullptr, &fc_output_temp, &fc_data_temp)); + gate_params.input_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + double real_multiplier = 0.0; + GetQuantizedConvolutionMultipler(context, input, input_weight, nullptr, + &fc_output_temp, &real_multiplier); + + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, hidden_state->type, hidden_state, + hidden_state_weight, hidden_state_bias, &fc_output_temp, &fc_data_temp)); + gate_params.recurrent_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + return kTfLiteOk; +} - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const TfLiteType output_type, + const int output_zp = 0) { + tflite::ArithmeticParams op_params = {}; + if (output_type == kTfLiteInt16) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } else if (output_type == kTfLiteInt8) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } + + op_params.input1_offset = 0; // symmetric + op_params.input2_offset = 0; // symmetric + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + -32768.0), + 32767.0)); + return cell_state_info; +} - IntegerLstmParameter* integer_lstm_params = &op_data->integer_lstm_param; +CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip) { + CellStateInfo cell_state_info; + cell_state_info.cell_clip = cell_clip; + cell_state_info.cell_state_scale_power = 0; // no quantization + cell_state_info.quantized_cell_clip = 0; // no quantization + return cell_state_info; +} - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - const auto* params = - static_cast(intermediate->quantization.params); - const int32_t hidden_zp = params->zero_point->data[0]; +tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} - // Get bias and perform zero point calculation. - // When there is layer normalization, the gate bias does not apply to matmul - // directly: - // y = ln(w * x + w * r + w * c) + b. - const bool is_layer_norm = op_data->use_layer_norm; +tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} - // Forget gate. - TfLiteTensor* forget_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_forget_weights, forget_gate_bias, - &(integer_lstm_params->input_to_forget_effective_bias))); +tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_forget_weights, - nullptr, &(integer_lstm_params->recurrent_to_forget_effective_bias))); +TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data) { + // Gate Parameters + op_data->forget_gate_parameters = CreateGateParamsFloat(); + op_data->input_gate_parameters = CreateGateParamsFloat(); + op_data->cell_gate_parameters = CreateGateParamsFloat(); + op_data->output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data->inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data->inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return kTfLiteOk; +} - // Modulation gate. - TfLiteTensor* cell_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_cell_weights, cell_gate_bias, - &(integer_lstm_params->input_to_cell_effective_bias))); +TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data) { + float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15 TF_LITE_ENSURE_OK( context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_cell_weights, nullptr, - &(integer_lstm_params->recurrent_to_cell_effective_bias))); - - // Output gate. - TfLiteTensor* output_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmOutputGateBiasTensor); + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToForgetWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmForgetGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToForgetWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->forget_gate_parameters)); TF_LITE_ENSURE_OK( context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_output_weights, output_gate_bias, - &(integer_lstm_params->input_to_output_effective_bias))); - + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToInputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmInputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToInputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->input_gate_parameters)); TF_LITE_ENSURE_OK( context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_output_weights, - nullptr, &(integer_lstm_params->recurrent_to_output_effective_bias))); - - // Input gate. The calculation is only meaningful for non-cifg case. - TfLiteTensor* input_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmInputGateBiasTensor); + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToCellWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmCellGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToCellWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->cell_gate_parameters)); TF_LITE_ENSURE_OK( context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_input_weights, input_gate_bias, - &(integer_lstm_params->input_to_input_effective_bias))); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_input_weights, nullptr, - &(integer_lstm_params->recurrent_to_input_effective_bias))); + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToOutputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmOutputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToOutputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->output_gate_parameters)); + + // Inter gate multiplication parameters + float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15 + float cell_state_scale = lstm_tensors.CellStateTensor()->params.scale; + // forget gate output (nonlinear output) x cell state -> cell state + op_data->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, cell_state_scale, + cell_state_scale, kTfLiteInt16); + // input gate output x cell gate output -> cell state + op_data->inter_gate_parameters.input_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, nonlinear_output_scale, + cell_state_scale, kTfLiteInt16); + // tanh output x output gate output -> hidden state (potentially asymmetric) + op_data->inter_gate_parameters.output_mul_params = CreateInterGateMulParams( + nonlinear_output_scale, nonlinear_output_scale, + lstm_tensors.HiddenStateTensor()->params.scale, + lstm_tensors.HiddenStateTensor()->type, + lstm_tensors.HiddenStateTensor()->params.zero_point); + return kTfLiteOk; +} - // Projection bias. The calculation is only meaningful for with projection. - TF_LITE_ENSURE_OK(context, - PrecomputeZeroPointTimesWeightWithBias( - context, hidden_zp, projection_weights, projection_bias, - &(integer_lstm_params->projection_effective_bias))); +LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context, + TfLiteNode* node) { + LSTMKernelContents kernel_content; + // Point to correct tensors + for (size_t i = 0; i < 24; i++) { + kernel_content.internal_tensors[i] = + tflite::micro::GetMutableEvalInput(context, node, i); + } + // Output tensor + kernel_content.output_tensor = tflite::micro::GetEvalOutput(context, node, 0); + return kernel_content; +} - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } +template +LSTMBuffers CreateLSTMBuffers(TfLiteContext* context, + const int* buffer_indices) { + LSTMBuffers buffers; + buffers.buffer0 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[0])); + buffers.buffer1 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[1])); + buffers.buffer2 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[2])); + buffers.buffer3 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[3])); + return buffers; +} - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); - } +/*Kernel functions*/ - return kTfLiteOk; +void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataLSTM)); } -// Resize the output and state tensors based on the sizes of the input tensors. -// Allocate a temporary scratch tensor. Also check that the sizes of the input -// tensors match each other. TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context, TfLiteNode* node) { - UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - // Check we have all the inputs and outputs we need. - bool use_layer_norm = false; - if (node->inputs->size == 24) { - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - if (forget_layer_norm_coefficients == nullptr) { - use_layer_norm = false; - } else { - use_layer_norm = true; - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - } else if (node->inputs->size == 20) { - // This is deprecated and is only kept here for backward compatibility. - use_layer_norm = false; - } else { - MicroPrintf("The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs", - node->inputs->size); - return kTfLiteError; - } TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - op_data->use_layer_norm = use_layer_norm; + TF_LITE_ENSURE_EQ(context, node->inputs->size, 24); - // Inferring batch size, number of outputs and sequence length and - // number of cells from the input tensors. - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - op_data->input_zero_point = input->params.zero_point; - const bool is_integer = input->type == kTfLiteInt8; - TF_LITE_ENSURE(context, input->dims->size > 1); - const auto* params = - reinterpret_cast( - node->builtin_data); - const bool time_major = params->time_major; - const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0]; - const int n_input = input->dims->data[2]; + TFLITE_DCHECK(node->builtin_data != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - const int n_cell = input_to_output_weights->dims->data[0]; - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); - - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->data[0], - n_cell); - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Check that input tensor dimensions matches with each other. + OpDataLSTM* op_data = reinterpret_cast(node->user_data); + const auto* builtin_data = + static_cast(node->builtin_data); + // All TempTfLiteTensors will be deallocated through the destructor. + LstmTensors lstm_tensors(context, node); + TF_LITE_ENSURE_OK(context, lstm_tensors.ValidateTensorStatus(context)); + + op_data->cell_gate_nonlinear_type = builtin_data->activation; + op_data->size_info = + CreateLstmSizeInfo(builtin_data->time_major, + lstm_tensors.GetInternalTensor(kLstmInputTensor)->dims, + lstm_tensors.HiddenStateTensor()->dims); TF_LITE_ENSURE_OK( - context, CheckInputTensorDimensions(context, node, n_input, n_output, - n_cell, use_layer_norm, is_integer)); - - // Get the pointer to output, output_state and cell_state buffer tensors. - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - op_data->output_state_zero_point = output_state->params.zero_point; - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - - // Check the shape of input state tensors. - // These tensor may be 1D or 2D. It's fine as long as the total size is - // correct. - TF_LITE_ENSURE_EQ(context, NumElements(output_state), n_batch * n_output); - TF_LITE_ENSURE_EQ(context, NumElements(cell_state), n_batch * n_cell); - - // Check the shape of output tensor against that of input tensor - TF_LITE_ENSURE_EQ(context, output->dims->size, 3); - TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]); - TF_LITE_ENSURE_EQ(context, input->dims->data[1], output->dims->data[1]); - TF_LITE_ENSURE_EQ(context, output->dims->data[2], n_output); - - if (is_integer) { - const int num_intermediate_tensors = node->intermediates->size; - TF_LITE_ENSURE(context, num_intermediate_tensors == 5); - } - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - - const bool use_cifg = (input_to_input_weights == nullptr); - - // Create a primary scratch buffer for hybrid and float - // If is_integer, primary scratch buffer has a different size - if (!is_integer) { - int scratch_buffer_size[2]; - scratch_buffer_size[0] = n_batch; - - if (use_cifg) { - // Reserving space for Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 3; - } else { - // Reserving space for Input, Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 4; - } - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - scratch_buffer_size[0] * scratch_buffer_size[1] * - TfLiteTypeGetSize(input->type), - &(op_data->scratch_index[kPrimaryScratchBuffer]))); - } - - if (is_integer) { - // Integer UnidirectionalSequenceLSTM prepare function for 8x8->16. - // This code path needs 5 intermediate tensors per Op. - // Populate quantization parameters. - PopulateQuantizedLstmParams8x8_16(context, node, - &op_data->integer_lstm_param); - // Allocate scratch buffer. Need 4 16-bit buffer with size n_batch * n_cell - // and 1 8-bit buffer with size n_batch * n_cell. For integer - // UnidirectionalSequenceLSTM, we do not need the extra 32-bit buffer. - for (int i = 0; i < 5; ++i) { - TfLiteType buffer_type = kTfLiteInt16; - - if (i == 4) { - buffer_type = kTfLiteInt8; - } - - TF_LITE_ENSURE_OK( - context, - context->RequestScratchBufferInArena( - context, n_batch * n_cell * TfLiteTypeGetSize(buffer_type), - &(op_data->scratch_index[i]))); - } - - // Populate precomputed zp * weight. - TF_LITE_ENSURE_OK(context, PopulatePrecomputedZPTimesWeightsWithBias( - context, op_data, node)); - } - - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (output != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); + context, ValidateTensorSize(context, lstm_tensors, op_data->size_info)); + + // Create cell state information and gate parameters (Fully Connected and Mul) + auto cell_state_type = + lstm_tensors.GetInternalTensor(kLstmCellStateTensor)->type; + if (cell_state_type == kTfLiteFloat32) { + op_data->cell_state_info = + CreateLstmCellStateInfoFloat(builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersFloat(context, lstm_tensors, op_data)); + } else if (cell_state_type == kTfLiteInt16) { + op_data->cell_state_info = CreateLstmCellStateInfo( + lstm_tensors.CellStateTensor()->params.scale, builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersInteger(context, lstm_tensors, op_data)); + } else { + MicroPrintf( + "Cell state type %s (%d) not supported. The quantized Unidirectional " + "Sequence LSTM Op only support int16 cell state", + TfLiteTypeGetName(cell_state_type), cell_state_type); + return kTfLiteError; } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); + // request buffers (four buffers) + for (size_t i = 0; i < 4; i++) { + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, + op_data->size_info.batch_size * + op_data->size_info.state_dimension * + TfLiteTypeGetSize(cell_state_type), + &(op_data->buffer_indices[i]))); } return kTfLiteOk; } TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(context->GetScratchBuffer != nullptr); - - const auto* params = - reinterpret_cast( - node->builtin_data); - const UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - const bool time_major = params->time_major; - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kLstmInputTensor); - - const TfLiteEvalTensor* input_to_input_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToInputWeightsTensor); - - const TfLiteEvalTensor* input_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToForgetWeightsTensor); - - const TfLiteEvalTensor* input_to_cell_weights = - tflite::micro::GetEvalInput(context, node, kLstmInputToCellWeightsTensor); - - const TfLiteEvalTensor* input_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToOutputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_input_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToInputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_forget_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToForgetWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_cell_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToCellWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_output_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToOutputWeightsTensor); - - const TfLiteEvalTensor* cell_to_input_weights = - tflite::micro::GetEvalInput(context, node, kLstmCellToInputWeightsTensor); - - const TfLiteEvalTensor* cell_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToForgetWeightsTensor); - - const TfLiteEvalTensor* cell_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToOutputWeightsTensor); - - const TfLiteEvalTensor* input_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmInputGateBiasTensor); - - const TfLiteEvalTensor* forget_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmForgetGateBiasTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataLSTM& op_data = *reinterpret_cast(node->user_data); + auto kernel_content = CreateLSTMKernelContent(context, node); - const TfLiteEvalTensor* cell_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmCellGateBiasTensor); + const auto activation_type = + kernel_content.internal_tensors[kLstmInputTensor]->type; + const auto weight_type = + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor]->type; - const TfLiteEvalTensor* output_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmOutputGateBiasTensor); - - const TfLiteEvalTensor* projection_weights = - tflite::micro::GetEvalInput(context, node, kLstmProjectionWeightsTensor); - - const TfLiteEvalTensor* projection_bias = - tflite::micro::GetEvalInput(context, node, kLstmProjectionBiasTensor); - - TfLiteEvalTensor* output_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmOutputStateTensor); - - TfLiteEvalTensor* cell_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmCellStateTensor); - - TFLITE_DCHECK(cell_state != nullptr); - - const TfLiteEvalTensor* input_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmInputLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* forget_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmForgetLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* cell_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmCellLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* output_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmOutputLayerNormCoefficientsTensor) - : nullptr; - - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kLstmOutputTensor); - - // Copy out the LSTM specific params so they can be passed in the function. - TfLiteLSTMParams lstm_params; - lstm_params.activation = params->activation; - lstm_params.cell_clip = params->cell_clip; - lstm_params.proj_clip = params->proj_clip; - lstm_params.asymmetric_quantize_inputs = params->asymmetric_quantize_inputs; - - switch (input_to_output_weights->type) { + switch (activation_type) { case kTfLiteFloat32: { - // Index the scratch buffers pointers to the global scratch buffer. - return EvalFloatLstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, - input_layer_norm_coefficients, forget_layer_norm_coefficients, - cell_layer_norm_coefficients, output_layer_norm_coefficients, - /*aux_input=*/nullptr, - /*aux_input_to_input_weights=*/nullptr, - /*aux_input_to_forget_weights=*/nullptr, - /*aux_input_to_cell_weights=*/nullptr, - /*aux_input_to_output_weights=*/nullptr, input_gate_bias, - forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, - /*output_offset=*/0, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kPrimaryScratchBuffer])), - output_state, cell_state, output); - } break; - case kTfLiteUInt8: + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, buffers); + break; + } case kTfLiteInt8: { - return EvalInteger8x8_16Lstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, - input_layer_norm_coefficients, forget_layer_norm_coefficients, - cell_layer_norm_coefficients, output_layer_norm_coefficients, - input_gate_bias, forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, &op_data->integer_lstm_param, - op_data->output_state_zero_point, output_state, cell_state, output, - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[0])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[1])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[2])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[3])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[4])), - nullptr); - } break; - default: - MicroPrintf("Type %s is not currently supported.", - TfLiteTypeGetName(input_to_output_weights->type)); + switch (weight_type) { + case kTfLiteInt8: { + // 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), activation_type); + return kTfLiteError; + } + } + break; + } + case kTfLiteInt16: { + switch (weight_type) { + case kTfLiteInt8: { + // 16(activation)x8(weight)->16(cell) LSTM with 64 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), weight_type); + return kTfLiteError; + } + } + break; + } + default: { + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(activation_type), activation_type); return kTfLiteError; + } } + return kTfLiteOk; } } // namespace @@ -1383,5 +586,4 @@ TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() { UnidirectionalSequenceLstmPrepare, UnidirectionalSequenceLstmEval); } - } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc index 96484330e..0160cb143 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -204,6 +204,14 @@ TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo( (current->bytes != 0); if (offline_offsets) { current->offline_offset = offline_offsets[i]; + + // Mark offline planned variable tensors so they can get an offline + // offset and be handled offline. + if (subgraph->tensors()->Get(i)->is_variable() && + current->offline_offset != kOnlinePlannedBuffer) { + current->needs_allocating = true; + } + } else { current->offline_offset = kOnlinePlannedBuffer; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc index 3853df307..f78b53a28 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -490,15 +490,6 @@ TfLiteStatus MicroAllocator::FinishModelAllocation( TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles( scratch_buffer_handles, scratch_buffer_request_count_)); - // Allocate buffers for variable tensors. - for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); - subgraph_idx++) { - const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); - TFLITE_DCHECK(subgraph != nullptr); - TF_LITE_ENSURE_STATUS(AllocateVariables( - subgraph, subgraph_allocations[subgraph_idx].tensors)); - } - // Plan all subgraphs and scratch buffers together. TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations, *scratch_buffer_handles)); @@ -754,23 +745,27 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors( return kTfLiteOk; } -TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors) { +TfLiteStatus MicroAllocator::AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { auto* tensor = subgraph->tensors()->Get(i); if (tensor->is_variable()) { - size_t buffer_size; - TF_LITE_ENSURE_STATUS( - TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); + if (offline_planner_offsets == nullptr || + offline_planner_offsets[i] == kOnlinePlannedBuffer) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); - eval_tensors[i].data.data = - persistent_buffer_allocator_->AllocatePersistentBuffer( - buffer_size, MicroArenaBufferAlignment()); + eval_tensors[i].data.data = + persistent_buffer_allocator_->AllocatePersistentBuffer( + buffer_size, MicroArenaBufferAlignment()); - if (eval_tensors[i].data.data == nullptr) { - MicroPrintf("Failed to allocate variable tensor of size %d", - buffer_size); - return kTfLiteError; + if (eval_tensors[i].data.data == nullptr) { + MicroPrintf("Failed to allocate variable tensor of size %d", + buffer_size); + return kTfLiteError; + } } } } @@ -819,6 +814,17 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( const int32_t* offline_planner_offsets = nullptr; TF_LITE_ENSURE_STATUS( builder.GetOfflinePlannedOffsets(&offline_planner_offsets)); + + // We allocate buffers for variable tensors here since the offline planner + // offsets are conviently available here. + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + TF_LITE_ENSURE_STATUS(AllocateVariables( + subgraph, allocations[subgraph_idx].tensors, offline_planner_offsets)); + } + TF_LITE_ENSURE_STATUS( builder.InitializeAllocationInfo(offline_planner_offsets, allocations)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h index 5cd0e1893..c68c71359 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -247,9 +247,13 @@ class MicroAllocator { // for all tensor buffers. virtual TfLiteStatus AllocateTfLiteEvalTensors( const Model* model, SubgraphAllocations* subgraph_allocations); + // Allocates persistent tensor buffers for variable tensors in the subgraph. - virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors); + // Online and offline variable tensors are handled differently hence the + // offline_planner_offsets parameter is needed. + virtual TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets); // Allocate and return a persistent TfLiteTensor. // TODO(b/162311891): Drop this method when the interpreter has an API for diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h index a4d50c83a..7622d2110 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -168,8 +168,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddCeil() { - return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(), - ParseCeil); + return AddBuiltin(BuiltinOperator_CEIL, Register_CEIL(), ParseCeil); } TfLiteStatus AddCircularBuffer() { @@ -177,8 +176,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddConcatenation() { - return AddBuiltin(BuiltinOperator_CONCATENATION, - tflite::ops::micro::Register_CONCATENATION(), + return AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), ParseConcatenation); } @@ -227,8 +225,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddEqual() { - return AddBuiltin(BuiltinOperator_EQUAL, - tflite::ops::micro::Register_EQUAL(), ParseEqual); + return AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), ParseEqual); } TfLiteStatus AddEthosU() { @@ -253,8 +250,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddFloor() { - return AddBuiltin(BuiltinOperator_FLOOR, - tflite::ops::micro::Register_FLOOR(), ParseFloor); + return AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR(), ParseFloor); } TfLiteStatus AddFloorDiv() { @@ -284,13 +280,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddGreater() { - return AddBuiltin(BuiltinOperator_GREATER, - tflite::ops::micro::Register_GREATER(), ParseGreater); + return AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), + ParseGreater); } TfLiteStatus AddGreaterEqual() { - return AddBuiltin(BuiltinOperator_GREATER_EQUAL, - tflite::ops::micro::Register_GREATER_EQUAL(), + return AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), ParseGreaterEqual); } @@ -320,13 +315,11 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddLess() { - return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(), - ParseLess); + return AddBuiltin(BuiltinOperator_LESS, Register_LESS(), ParseLess); } TfLiteStatus AddLessEqual() { - return AddBuiltin(BuiltinOperator_LESS_EQUAL, - tflite::ops::micro::Register_LESS_EQUAL(), + return AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), ParseLessEqual); } @@ -362,8 +355,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddMaximum() { - return AddBuiltin(BuiltinOperator_MAXIMUM, - tflite::ops::micro::Register_MAXIMUM(), ParseMaximum); + return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), + ParseMaximum); } TfLiteStatus AddMaxPool2D( @@ -381,8 +374,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddMinimum() { - return AddBuiltin(BuiltinOperator_MINIMUM, - tflite::ops::micro::Register_MINIMUM(), ParseMinimum); + return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), + ParseMinimum); } TfLiteStatus AddMul(const TfLiteRegistration& registration = Register_MUL()) { @@ -394,13 +387,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddNotEqual() { - return AddBuiltin(BuiltinOperator_NOT_EQUAL, - tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual); + return AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), + ParseNotEqual); } TfLiteStatus AddPack() { - return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(), - ParsePack); + return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack); } TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) { @@ -505,8 +497,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSplitV() { - return AddBuiltin(BuiltinOperator_SPLIT_V, - tflite::ops::micro::Register_SPLIT_V(), ParseSplitV); + return AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(), ParseSplitV); } TfLiteStatus AddSqueeze() { @@ -531,8 +522,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddStridedSlice() { - return AddBuiltin(BuiltinOperator_STRIDED_SLICE, - tflite::ops::micro::Register_STRIDED_SLICE(), + return AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(), ParseStridedSlice); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc index 9f6fc74c9..e9eb5e549 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc @@ -52,7 +52,7 @@ void MicroProfiler::Log() const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, + MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); } #endif @@ -63,7 +63,7 @@ void MicroProfiler::LogCsv() const { MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%u", i, tags_[i], ticks); + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); } #endif } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc index e6cea845b..767e7d17d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc @@ -63,6 +63,7 @@ int MicroResourceVariables::CreateIdIfNoneFound(const char* container, resource_variables_[resource_id].shared_name = shared_name; resource_variables_[resource_id].resource_buffer = nullptr; resource_variables_[resource_id].bytes = 0; + resource_variables_[resource_id].default_value = 0; return resource_id; } @@ -96,9 +97,17 @@ TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context, MicroPrintf("Failed to allocate resource buffer."); return kTfLiteError; } - // Zero out resource buffers by deafult. Buffers can be initialized to - // nonzero values using ASSIGN_VARIABLE. - memset(variable.resource_buffer, 0, variable.bytes); + // Set resource buffers to the zero_point by default. Buffers can be + // initialized to nonzero values using ASSIGN_VARIABLE. + // See comment#2 in b/269648474 for more details why we use zero_point. + if (tensor->quantization.params != nullptr) { + auto* quantization_data = reinterpret_cast( + tensor->quantization.params); + int8_t zero_point = quantization_data->zero_point[0].data[0]; + variable.default_value = zero_point; + } + // TODO(b/269669735): Explains why casting zero_point to int8 and memset. + memset(variable.resource_buffer, variable.default_value, variable.bytes); } return kTfLiteOk; @@ -127,7 +136,8 @@ TfLiteStatus MicroResourceVariables::Assign(int id, TfLiteStatus MicroResourceVariables::ResetAll() { for (int i = 0; i < num_resource_variables_; i++) { MicroResourceVariable variable = resource_variables_[i]; - memset(variable.resource_buffer, 0, variable.bytes); + // TODO(b/269669735): Explains why casting zero_point to int8 and memset. + memset(variable.resource_buffer, variable.default_value, variable.bytes); } return kTfLiteOk; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h index e8df991c3..fb9917d47 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h @@ -69,6 +69,8 @@ class MicroResourceVariables { // This is only for verifying read size. size_t bytes; + // Initialization default value + int8_t default_value; }; MicroResourceVariables(MicroResourceVariable* variables, diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc index f646d61a2..f41dba61d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -192,11 +192,12 @@ TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors( } TfLiteStatus RecordingMicroAllocator::AllocateVariables( - const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) { + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { RecordedAllocation allocations = SnapshotAllocationUsage(); - TfLiteStatus status = - MicroAllocator::AllocateVariables(subgraph, eval_tensors); + TfLiteStatus status = MicroAllocator::AllocateVariables( + subgraph, eval_tensors, offline_planner_offsets); RecordAllocationUsage(allocations, recorded_tflite_tensor_variable_buffer_data_); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h index 3136fadea..b6f69264d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -77,8 +77,9 @@ class RecordingMicroAllocator : public MicroAllocator { const Model* model, SubgraphAllocations* subgraph_allocations) override; TfLiteStatus AllocateTfLiteEvalTensors( const Model* model, SubgraphAllocations* subgraph_allocations) override; - TfLiteStatus AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors) override; + TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) override; // TODO(b/162311891): Once all kernels have been updated to the new API drop // this method. It is only used to record TfLiteTensor persistent allocations. TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD index c0046847f..10ea6f06a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD @@ -1,3 +1,5 @@ +load("@tflm_pip_deps//:requirements.bzl", "requirement") + package( default_visibility = ["//:__subpackages__"], licenses = ["notice"], @@ -22,6 +24,38 @@ py_binary( srcs = ["generate_cc_arrays.py"], ) +py_binary( + name = "requantize_flatbuffer", + srcs = [ + "requantize_flatbuffer.py", + "requantize_flatbuffer_utils.py", + ], + srcs_version = "PY3", + deps = [ + "//tensorflow/lite/python:schema_py", + "//tensorflow/lite/tools:flatbuffer_utils", + "@absl_py//absl:app", + ], +) + +py_test( + name = "requantize_flatbuffer_test", + srcs = ["requantize_flatbuffer_test.py"], + main = "requantize_flatbuffer_test.py", + python_version = "PY3", + tags = [ + "noasan", + "nomsan", # Python doesn't like these symbols from interpreter_wrapper_pybind.so + "noubsan", + ], + deps = [ + ":requantize_flatbuffer", + "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime", + requirement("numpy"), + requirement("tensorflow-cpu"), + ], +) + cc_binary( name = "tflite_flatbuffer_align", srcs = [ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh new file mode 100755 index 000000000..abfe651c6 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - EXTERNAL or INTERNAL to signal how to run the script +# 2 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 3 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code + +set -e +pwd + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143904317): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + build -j$(nproc) + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + test -j$(nproc) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako index 68176c566..3de4ef406 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako @@ -75,7 +75,7 @@ void RunModel(const uint8_t* model, TfLiteTensor* output_tensor = interpreter.output(0); TF_LITE_MICRO_EXPECT_EQ(output_tensor->bytes, golden_size * sizeof(int8_t)); - int8_t* output = GetTensorData(output_tensor); + int8_t* output = ::tflite::GetTensorData(output_tensor); for (uint32_t i = 0; i < golden_size; i++) { // TODO(b/205046520): Better understand why TfLite and TFLM can sometimes be // off by 1. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile index 25c234d75..84dbe6049 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile @@ -267,8 +267,8 @@ endif # Kernel integration tests must be excluded on certain targets. MICRO_LITE_INTEGRATION_TESTS += $(shell find $(TENSORFLOW_ROOT)tensorflow/lite/micro/integration_tests -name Makefile.inc) -MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += $(shell find \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect -name Makefile.inc) +MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += \ + $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect/Makefile.inc) MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/benchmarks/Makefile.inc) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh index da51d756a..665c6a62e 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh @@ -55,6 +55,8 @@ if [ ! -f ${CONVERTED_PERSON_MODEL_INT8} ]; then source $TEMPFILE/bin/activate python3 -m pip install --upgrade pip >&2 pip install --upgrade cython >&2 + pip install numpy==1.21.3 >&2 # Some types are removed in the latest numpy. + # Use an older version until the ethos-u-vela package is updated. pip install --prefer-binary ethos-u-vela >&2 vela --accelerator-config=ethos-u55-256 ${DOWNLOADS_DIR}/../../../models/person_detect.tflite \ --output-dir ${MODEL_DIR} >&2 diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc index 20ee1e4e0..e4d0106ea 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc @@ -8,6 +8,8 @@ MICROLITE_CC_KERNEL_SRCS += \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/conv_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_vision.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_common_xtensa.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_int8.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pad_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pooling_vision.cc \ @@ -16,7 +18,13 @@ MICROLITE_CC_KERNEL_SRCS += \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_vision.cc -ifeq ($(TARGET_ARCH), hifi5) +ifeq ($(TARGET_ARCH), hifimini) + # hifimini optimizations are implemented in the TFLM repository itself. + THIRD_PARTY_KERNEL_CC_SRCS += \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/svdf.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/fully_connected.cc + +else ifeq ($(TARGET_ARCH), hifi5) DOWNLOAD_RESULT := $(shell $(MAKEFILE_DIR)/ext_libs/xtensa_download.sh ${DOWNLOADS_DIR} hifi5 $(TENSORFLOW_ROOT)) ifneq ($(DOWNLOAD_RESULT), SUCCESS) $(error Something went wrong with the xtensa download: $(DOWNLOAD_RESULT)) @@ -124,7 +132,7 @@ else ifeq ($(TARGET_ARCH), vision_p6) INCLUDES += \ -I$(NNLIB_PATH)/flk/include \ -I$(NNLIB_PATH)/kernels/include/ \ - -I$(NNLIB_PATH)/runtime/include/ + -I$(NNLIB_PATH)/runtime/include/ LDFLAGS += -lidma else @@ -141,4 +149,10 @@ THIRD_PARTY_KERNEL_CC_SRCS += \ THIRD_PARTY_CC_HDRS += \ $(shell find $(FFT_PATH)/hifi3_fft -name "*.h") +else ifeq ($(TARGET_ARCH), hifimini) +THIRD_PARTY_KERNEL_CC_SRCS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.c") + +THIRD_PARTY_CC_HDRS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.h") endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py new file mode 100644 index 000000000..d32cd8973 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py @@ -0,0 +1,204 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""An experimental tool to requantize a int8 activation, int8 weight LSTM based model to int16 activation, int8 weight + +Steps: +1. Convert the trained model to int8 using the TFLite converter. See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization +2. Use this tool to requantize the int8 model to int16. +3. Check if the requantized model match the expectation (e.g., read the conversion printout, perform inference tests) + +The conversion process: +1. Requantize the ops specified in _COMPLEX_OP_REQUANTIZE_REGISTRATION using the registered function. Bias type conversion (int32 to int64) only happens here. +2. Requantize all non-constant tensors with int8 type to int16 (and fix the quantization parameters) + +Run: +bazel build tensorflow/lite/micro/tools:requantize_flatbuffer +bazel-bin/tensorflow/lite/micro/tools/requantize_flatbuffer --int8_model_path=".tflite file path"` --save_path="save path" + +CAVEAT: +1. Use this tool ONLY for models that contain the LSTM layer. All other models should use the standard tflite conversion process. +2. This is an experimental tool. ALWAYS check if the converted model matches your expectation +3. Add the custom op requantization function for complex ops (e.g., convolution). +4. We assume ops not in _COMPLEX_OP_REQUANTIZE_REGISTRATION only have activation tensors (i.e. no weights and bias). Check the quantized model performance if you add additional ops to _TESTED_SIMPLE_OPS + +""" +import os + +import numpy as np +from absl import app +from absl import flags +from absl import logging + +from tflite_micro.tensorflow.lite.tools import flatbuffer_utils +from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer_utils +from tflite_micro.tensorflow.lite.python import schema_py_generated + +FLAGS = flags.FLAGS + +flags.DEFINE_string("int8_model_path", + default=None, + help="the int8 model path.") +flags.DEFINE_string("save_path", + default=None, + help="path to save the requantized model.") + +# key: BuiltinOperator (see tensorflow/lite/schema/schema.fbs) +# Val: the requantize function defined in requantize_flatbuffer_utils.py +_COMPLEX_OP_REQUANTIZE_REGISTRATION = { + schema_py_generated.BuiltinOperator.FULLY_CONNECTED: + requantize_flatbuffer_utils.requantize_fully_connected, + schema_py_generated.BuiltinOperator.UNIDIRECTIONAL_SEQUENCE_LSTM: + requantize_flatbuffer_utils.requantize_unidirectional_sequence_lstm, + schema_py_generated.BuiltinOperator.SOFTMAX: + requantize_flatbuffer_utils.requantize_softmax +} + +# List of tested simple operators (no weight and bias, e.g., reshape) see tensorflow/lite/schema/schema.fbs for op code names +_TESTED_SIMPLE_OPS = [ + schema_py_generated.BuiltinOperator.RESHAPE, + schema_py_generated.BuiltinOperator.QUANTIZE, + schema_py_generated.BuiltinOperator.DEQUANTIZE +] + +_SUPPORTED_OPS = set( + list(_COMPLEX_OP_REQUANTIZE_REGISTRATION.keys()) + _TESTED_SIMPLE_OPS) + + +class Requantizer: + """Requantize an int8 activation model to int16""" + + def __init__(self, int8_model): + """Initialize the int8 to int16 converter. + + Args: + int8_model: flatbuffer python object + """ + self.model = int8_model + self.remaining_tensors = set() + for subgraph in self.model.subgraphs: + for tensor in subgraph.tensors: + self.remaining_tensors.add(tensor) + + @classmethod + def from_file(self, model_path): + """Instantiates a converter from a int8 quantized .tflite filepath. + + Args: + model_path: Filepath to the .tflite model + + Returns: + An Int8ToInt16Converter instance + """ + int8_model = flatbuffer_utils.read_model(model_path) + return Requantizer(int8_model) + + @classmethod + def from_bytes(self, bytearray): + """Instantiates a converter from a int8 quantized .tflite bytearray. + + Args: + bytearray: Content of the .tflite model + + Returns: + An Int8ToInt16Converter instance + """ + int8_model = flatbuffer_utils.convert_bytearray_to_object(bytearray) + return Requantizer(int8_model) + + def _remove_tensor(self, tensor): + """Remove tensor from the tensor pool""" + if tensor in self.remaining_tensors: + self.remaining_tensors.remove(tensor) + + def _remove_op_tensors(self, tensors, op): + """Remove tensors in an operator from the tensor pool + + Args: + tensors: tensors in the subgraph + op : the operator + """ + for id in op.inputs: + # -1 means non-used tensor + if id != -1: + self._remove_tensor(tensors[id]) + for id in op.outputs: + if id != -1: + self._remove_tensor(tensors[id]) + + def _convert_ops(self): + """Convert all ops registered in _OP_CONVERSION_REGISTRATION from int8 to int16 (activation type)""" + op_codes = self.model.operatorCodes + for subgraph in self.model.subgraphs: + tensors = subgraph.tensors + for op in subgraph.operators: + op_code = op_codes[op.opcodeIndex].builtinCode + op_name = flatbuffer_utils.opcode_to_name(self.model, op.opcodeIndex) + if op_code not in _SUPPORTED_OPS: + raise RuntimeError( + f"Operator {op_name} is not supported. If the operator contains weight/bias, develop and register the corresponding requantize function in _COMPLEX_OP_CONVERSION_REGISTRATION. Otherwise, try add the op code to _TESTED_SIMPLE_OPS and validate the requantized model " + ) + if op_code in _COMPLEX_OP_REQUANTIZE_REGISTRATION: + logging.info(f"Convert operator {op_name}") + _COMPLEX_OP_REQUANTIZE_REGISTRATION[op_code](tensors, + self.model.buffers, op) + self._remove_op_tensors(tensors, op) + + def _change_tensor_activation_type(self): + """Change all remaining tensor types from int8 to int16""" + for subgraph in self.model.subgraphs: + for tensor in subgraph.tensors: + if ((tensor in self.remaining_tensors) + and (requantize_flatbuffer_utils.TENSOR_CODE_TYPE[tensor.type] + == np.int8) and ("const" not in str(tensor.name))): + requantize_flatbuffer_utils.change_activation_tensor_8to16(tensor) + self._remove_tensor(tensor) + + def requantize_8to16(self): + ''' + The requantize process has two phase: + 1. Go through the registered ops and perform the custom op transformation + 2. Go through the rest of tensors and convert int8 non-const tensor to int16 + ''' + + logging.info("Reset Operators") + self._convert_ops() + logging.info("Set Remaining Activation Types") + self._change_tensor_activation_type() + logging.info("Remaining Tensors:") + for tensor in self.remaining_tensors: + logging.info( + f"{tensor.name}, tensor type {flatbuffer_utils.type_to_name(tensor.type)}" + ) + + def save_model(self, output_path): + """Save the requantized model to a specificed location.""" + flatbuffer_utils.write_model(self.model, output_path) + + def model_bytearray(self): + """Get the flatbuffer bytearray""" + return flatbuffer_utils.convert_object_to_bytearray(self.model) + + +def main(_): + if not os.path.exists(FLAGS.int8_model_path): + raise ValueError( + "Model file does not exist. Please check the .tflite model path.") + requantizer = Requantizer.from_file(FLAGS.int8_model_path) + requantizer.requantize_8to16() + requantizer.save_model(FLAGS.save_path) + + +if __name__ == "__main__": + app.run(main) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py new file mode 100644 index 000000000..3dae5a8a2 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py @@ -0,0 +1,115 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import os + +import numpy as np +import tensorflow as tf + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime +from tflite_micro.tensorflow.lite.tools import flatbuffer_utils + + +#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test +def create_simple_fc_model(): + '''Create a simple model with two fully connected(fc) layers''' + model = tf.keras.models.Sequential([ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(50, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="output") + ]) + fixed_input = tf.keras.layers.Input(shape=[28, 28], + batch_size=1, + dtype=model.inputs[0].dtype, + name="fixed_input") + fixed_output = model(fixed_input) + return tf.keras.models.Model(fixed_input, fixed_output) + + +def representative_dataset_gen(num_samples=100): + np.random.seed(42) #Seed the random number generator + for _ in range(num_samples): + yield [np.random.random((1, 28, 28)).astype(np.float32)] + + +def convert_tfl_converter(keras_model, + representative_dataset_gen, + int16=False): + '''Convert and quantize the keras model using the standard tflite converter''' + converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + if int16: + converter.target_spec.supported_ops = [ + tf.lite.OpsSet. + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 + ] + converter.representative_dataset = representative_dataset_gen + return converter.convert() + + +def convert_8to16_requantizer(keras_model, representative_dataset_gen): + '''Convert and quantize the keras model using the int8 to int16 conversion tool''' + # Convert to int8 first + int8_model = convert_tfl_converter(keras_model, + representative_dataset_gen, + int16=False) + int8_model = flatbuffer_utils.convert_bytearray_to_object(int8_model) + # Use the tool to convert to int16 + requantizer = requantize_flatbuffer.Requantizer(int8_model) + requantizer.requantize_8to16() + return flatbuffer_utils.convert_object_to_bytearray(requantizer.model) + + +class SimpleFCModelTest(test_util.TensorFlowTestCase): + + def testCompareWithStandardConversion(self): + + def inference(tflm_interpreter, data_x): + tflm_interpreter.set_input(data_x, 0) + tflm_interpreter.invoke() + return tflm_interpreter.get_output(0) + + keras_model = create_simple_fc_model( + ) # int16 fc is supported in tflite converter + tfl_converted_int16_model = convert_tfl_converter( + keras_model, representative_dataset_gen, int16=True) + int8_converted_int16_model = convert_8to16_requantizer( + keras_model, representative_dataset_gen) + + interpreter_tfl_converted = tflm_runtime.Interpreter.from_bytes( + tfl_converted_int16_model) + interpreter_tool_converted = tflm_runtime.Interpreter.from_bytes( + int8_converted_int16_model) + + num_steps = 10 + # Give the same (random) input to both interpreters to confirm that the outputs are similar. + for _ in range(0, num_steps): + data_x = np.random.random((1, 28, 28)).astype("float32") + + tfl_converted_result = inference(interpreter_tfl_converted, data_x)[0] + tool_converted_result = inference(interpreter_tool_converted, data_x)[0] + + max_diff = max(abs(tool_converted_result - tfl_converted_result)) + self.assertLess( + max_diff, 1e-4 + ) # can not be the same since int8 model loses some range information + + +if __name__ == "__main__": + test.main() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py new file mode 100644 index 000000000..f0af23fb1 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py @@ -0,0 +1,219 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import numpy as np +from absl import logging +from tflite_micro.tensorflow.lite.python.schema_py_generated import TensorType + +# Map flatbuffer tensor type code to numpy data type. see Table TensorType in tensorflow/lite/schema/schema.fbs +# TODO(b/269487423): use a common util function instead +TENSOR_CODE_TYPE = { + TensorType.FLOAT32: np.float32, + TensorType.FLOAT16: np.float16, + TensorType.INT32: np.int32, + TensorType.UINT8: np.uint8, + TensorType.INT64: np.int64, + TensorType.STRING: np.string_, + TensorType.BOOL: np.bool_, + TensorType.INT16: np.int16, + TensorType.COMPLEX64: np.complex64, + TensorType.INT8: np.int8, + TensorType.FLOAT64: np.float64, + TensorType.COMPLEX128: np.complex128, + TensorType.UINT64: np.uint64, + TensorType.RESOURCE: "RESOURCE", + TensorType.VARIANT: "VARIANT", + TensorType.UINT32: np.uint32, + TensorType.UINT16: np.uint16, + TensorType.INT4: "INT4", +} + +# TODO(b/269487423): use a common util function instead +TENSOR_TYPE_CODE = dict((reversed(item) for item in TENSOR_CODE_TYPE.items())) + + +def clip_range(vals, bit_width): + """Mimic integer calculation. + + Clip the range of vals based on bit width. + + e.g., clip_range([300], 8) = [127] since int8 have range [-128, 127] + + Args: + vals (np.array): float representation of the integer values + bit_width (int): number of desired bits for vals + + Returns: + np.array : clipped vals + """ + # Numpy integer calculation does not do saturation. Implement here + min_val = -2**(bit_width - 1) + max_val = 2**(bit_width - 1) - 1 + if vals.max() > max_val or vals.min() < min_val: + logging.info(f"WARNING: integer overflow!") + return np.clip(vals, min_val, max_val) + + +def quantize_data(data, scale, zero_point=0, bit_width=8): + """Quantize the data to integer type with desired bit width. + + The quantized data is represented using float since integer calculation in + numpy may differ from other implementations (e.g., no integer saturation + protection in numpy) + + Args: + data (np.array): float data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + bit_width (int): number of representative bits for vals + + Returns: + np.array : quantized data in float but clipped range + """ + vals = np.round(data / scale) + zero_point + return clip_range(vals, bit_width) + + +def dequantize_data(quantized_data, scale, zero_point=0): + """Dequantize the data to integer type with desired bit width. + + Args: + quantized_data (np.array): quantized data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + + Returns: + np.array : dequantized data + """ + return scale * (quantized_data - zero_point) + + +def change_quantization_settings_8to16(tensor): + """Change the quantization seeting of the tensor from int8 to int16""" + + if (tensor.quantization.quantizedDimension != 0): + raise RuntimeError( + "Only layer level quantization is supported. Per channel quantization is not supported now" + ) + + scale = tensor.quantization.scale[0] + zero_point = tensor.quantization.zeroPoint[0] + + # Set MAX_INT8 from 127 to 128 to compromise the range precision loss due to int8 quantization + MIN_INT8, MAX_INT8 = -128, 128 + # Narrow range (-min == max) is used for symmetrical quantization + MIN_INT16, MAX_INT16 = -32767, 32767 + + # Asymmertical quantized: scale * (qmax - zero_point) = rmax + rmax = scale * (MAX_INT8 - zero_point) + rmin = scale * (MIN_INT8 - zero_point) + # symmertical quantized: scale * qmax = rmax + scale_16 = max(abs(rmax), abs(rmin)) / abs(MIN_INT16) + # Change scale: Symmetrical Quantized + tensor.quantization.scale = [scale_16] + tensor.quantization.zeroPoint = [0] + + +def change_activation_tensor_8to16(tensor): + """Change the quantization setting of a activation tensor from int8 to int16""" + if tensor.type == TENSOR_TYPE_CODE[np.int8]: + change_quantization_settings_8to16(tensor) + tensor.type = TENSOR_TYPE_CODE[np.int16] + logging.info(f"Set {tensor.name} from int8 to int16 ") + + +def set_bias_type_int64(buffers, input, weight, bias): + """Set the bias tensor quantization setting from int32 to int64 + + Args: + buffers (list): buffers for the model + input (Tensor): the corresponding input tensor for the bias + weight (Tensor): the corresponding weight tensor for the bias + bias (Tensor): the bias tensor that need to be modified + """ + bias_buffer = buffers[bias.buffer] + bias_scale = bias.quantization.scale[0] + bias_zero_pt = bias.quantization.zeroPoint[0] + data = np.frombuffer(bias_buffer.data, dtype=np.int32) + dequantized_data = dequantize_data(data, bias_scale, bias_zero_pt) + bias_scale_int64 = (input.quantization.scale[0] * + weight.quantization.scale[0]) + bias_zero_pt_int64 = 0 # symmetrical quantized + int64_data = quantize_data(dequantized_data, bias_scale_int64, + bias_zero_pt_int64, 64).astype(np.int64) + bias_buffer.data = int64_data.tobytes() + + bias.type = TENSOR_TYPE_CODE[np.int64] + bias.quantization.scale = [bias_scale_int64] + bias.quantization.zeroPoint = [bias_zero_pt_int64] + logging.info(f"Set {bias.name} from int32 to int64") + + +def requantize_fully_connected(tensors, buffers, op): + """Requantize the fully connected op from int8 to int16""" + # Indices are from tensorflow/lite/micro/kernels/fully_connected_common.cc + input_tensor = tensors[op.inputs[0]] + # weight stays the same, no change needed + weight_tensor = tensors[op.inputs[1]] + output_tensor = tensors[op.outputs[0]] + + change_activation_tensor_8to16(input_tensor) + change_activation_tensor_8to16(output_tensor) + # if the bias does not exist, op.inputs[2] == -1 + if op.inputs[2] != -1: + bias_tensor = tensors[op.inputs[2]] + set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor) + + +def requantize_unidirectional_sequence_lstm(tensors, buffers, op): + """Requantize the unidirectonal sequance lstm op from int8 to int16 """ + input_tensor = tensors[op.inputs[0]] + hidden_state_tensor = tensors[op.inputs[18]] + output_tensor = tensors[op.outputs[0]] + + # Indices are from tensorflow/lite/micro/kernels/lstm_shared.h + input_weights_idx = [1, 2, 3, 4] + recurrent_weights_idx = [5, 6, 7, 8] + bias_idx = [12, 13, 14, 15] + + change_activation_tensor_8to16(input_tensor) + change_activation_tensor_8to16(hidden_state_tensor) + change_activation_tensor_8to16(output_tensor) + + for weight_id, bias_id in zip(input_weights_idx, bias_idx): + weight_tensor = tensors[op.inputs[weight_id]] + bias_tensor = tensors[op.inputs[bias_id]] + set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor) + + # recurrent weights have no associated biases + for weight_id in recurrent_weights_idx: + weight_tensor = tensors[op.inputs[weight_id]] + + +def requantize_softmax(tensors, buffers, op): + """Requantize the softmax op from int8 to int16""" + input_tensor = tensors[op.inputs[0]] + output_tensor = tensors[op.outputs[0]] + + # Change input type + change_activation_tensor_8to16(input_tensor) + + # Output range is always [0,1] + if output_tensor.type == TENSOR_TYPE_CODE[np.int8]: + # change quantization settings + output_tensor.quantization.scale = [1 / 32768] + output_tensor.quantization.zeroPoint = [0] + # Set tensor type + output_tensor.type = TENSOR_TYPE_CODE[np.int16] + logging.info(f"Set {output_tensor.name} from int8 to int16 ")