diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version index 9874f722c..1224499cb 100644 --- a/conf/tflite-micro.version +++ b/conf/tflite-micro.version @@ -1 +1 @@ -8746ec9 +b489b99 diff --git a/third_party/tflite-micro/tensorflow/lite/builtin_ops.h b/third_party/tflite-micro/tensorflow/lite/builtin_ops.h index 337073080..f9871add2 100644 --- a/third_party/tflite-micro/tensorflow/lite/builtin_ops.h +++ b/third_party/tflite-micro/tensorflow/lite/builtin_ops.h @@ -186,6 +186,9 @@ typedef enum { kTfLiteBuiltinAtan2 = 156, kTfLiteBuiltinUnsortedSegmentMin = 157, kTfLiteBuiltinSign = 158, + kTfLiteBuiltinBitcast = 159, + kTfLiteBuiltinBitwiseXor = 160, + kTfLiteBuiltinRightShift = 161, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h index b1981b3c5..7628e5ad1 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h +++ b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,8 +15,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ #define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/builtin_op_data.h. -#include "tensorflow/lite/core/c/builtin_op_data.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/builtin_op_data.h" #endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h index 18bccde66..cdbf1fd32 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h +++ b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,15 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - -// This file declares types used by the pure C inference API defined in c_api.h, -// some of which are also used in the C++ and C kernel and interpreter APIs. - #ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_ #define TENSORFLOW_LITE_C_C_API_TYPES_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/c_api_types.h. -#include "tensorflow/lite/core/c/c_api_types.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/c_api_types.h" #endif // TENSORFLOW_LITE_C_C_API_TYPES_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/c/common.h b/third_party/tflite-micro/tensorflow/lite/c/common.h index 718650df8..e3e8001cb 100644 --- a/third_party/tflite-micro/tensorflow/lite/c/common.h +++ b/third_party/tflite-micro/tensorflow/lite/c/common.h @@ -36,8 +36,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_C_COMMON_H_ #define TENSORFLOW_LITE_C_COMMON_H_ -/// For documentation, see -/// third_party/tensorflow/lite/core/c/common.h. -#include "tensorflow/lite/core/c/common.h" // IWYU pragma: export +#include "tensorflow/lite/core/c/common.h" #endif // TENSORFLOW_LITE_C_COMMON_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc index 2879afd8c..af0a0eb22 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -256,6 +256,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, return ParseElu(op, error_reporter, allocator, builtin_data); } + case BuiltinOperator_EMBEDDING_LOOKUP: { + return ParseEmbeddingLookup(op, error_reporter, allocator, builtin_data); + } + case BuiltinOperator_EXP: { return ParseExp(op, error_reporter, allocator, builtin_data); } @@ -542,6 +546,14 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, return ParseZerosLike(op, error_reporter, allocator, builtin_data); } + case BuiltinOperator_BITWISE_XOR: { + return ParseBitwiseXor(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RIGHT_SHIFT: { + return ParseRightShift(op, error_reporter, allocator, builtin_data); + } + case BuiltinOperator_CAST: { return ParseCast(op, error_reporter, allocator, builtin_data); } @@ -845,6 +857,7 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, *builtin_data = params.release(); return kTfLiteOk; } + // Below are the ops with no builtin_data structure. // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are // ok for now, since there is no call implementation either. @@ -855,7 +868,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_CUSTOM: case BuiltinOperator_DENSIFY: case BuiltinOperator_DYNAMIC_UPDATE_SLICE: - case BuiltinOperator_EMBEDDING_LOOKUP: case BuiltinOperator_EQUAL: case BuiltinOperator_HASHTABLE_FIND: case BuiltinOperator_HASHTABLE_IMPORT: @@ -885,6 +897,7 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_UNSORTED_SEGMENT_SUM: case BuiltinOperator_ATAN2: case BuiltinOperator_SIGN: + case BuiltinOperator_BITCAST: case BuiltinOperator_WHERE: return kTfLiteOk; case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES: @@ -1335,6 +1348,14 @@ TfLiteStatus ParseElu(const Operator*, ErrorReporter*, BuiltinDataAllocator*, return kTfLiteOk; } +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseEmbeddingLookup(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + // We have this parse function instead of directly returning kTfLiteOk from the // switch-case in ParseOpData because this function is used as part of the // selective registration for the OpResolver implementation in micro. @@ -2441,6 +2462,22 @@ TfLiteStatus ParseZerosLike(const Operator*, ErrorReporter*, return kTfLiteOk; } +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseBitwiseXor(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRightShift(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data) { diff --git a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h index 4df83d5ee..9ffe3971c 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h +++ b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h @@ -151,6 +151,11 @@ TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter, TfLiteStatus ParseElu(const Operator* op, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data); +TfLiteStatus ParseEmbeddingLookup(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data); @@ -407,6 +412,14 @@ TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data); +TfLiteStatus ParseBitwiseXor(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseRightShift(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + } // namespace tflite #endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc index 827312b45..00bbcde28 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc @@ -219,11 +219,11 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) { return kTfLiteOk; } -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data) { +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data) { if (tensor->allocation_type != kTfLiteDynamic && tensor->allocation_type != kTfLitePersistentRo) { - return; + return kTfLiteOk; } #ifdef TF_LITE_TENSORFLOW_PROFILER tflite::PauseHeapMonitoring(/*pause=*/true); @@ -258,9 +258,15 @@ void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, tflite::PauseHeapMonitoring(/*pause=*/false); #endif tensor->bytes = num_bytes; + if (tensor->data.data == nullptr && num_bytes != 0) { + // We are done allocating but tensor is pointing to null and a valid size + // was requested, so we error. + return kTfLiteError; + } + return kTfLiteOk; } -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true); } #endif // TF_LITE_STATIC_MEMORY @@ -331,4 +337,18 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) { delete tflite_delegate; } +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) { + if (!delegate) return nullptr; + + // The following cast is safe only because this code is part of the + // TF Lite runtime implementation. Apps using TF Lite should not rely on + // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent. + const auto* tflite_delegate = + reinterpret_cast(delegate); + + if (!tflite_delegate->opaque_delegate_builder) return tflite_delegate->data_; + + return tflite_delegate->opaque_delegate_builder->data; +} + } // extern "C" diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.h b/third_party/tflite-micro/tensorflow/lite/core/c/common.h index 46d5e650a..dcee532bb 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.h +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_ #define TENSORFLOW_LITE_CORE_C_COMMON_H_ +#include #include #include #include @@ -648,23 +649,26 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // If `preserve_data` is true, tensor data will be unchanged in the range from -// the start of the region up to the minimum of the old and new sizes. -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data); +// the start of the region up to the minimum of the old and new sizes. In the +// case of NULL tensor, or an error allocating new memory, returns +// `kTfLiteError`. +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than kTfLiteDynamic will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // Tensor data will be unchanged in the range from the start of the region up to -// the minimum of the old and new sizes. -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); +// the minimum of the old and new sizes. In the case +// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`. +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); #endif // TF_LITE_STATIC_MEMORY // WARNING: This is an experimental interface that is subject to change. @@ -955,12 +959,53 @@ typedef struct TfLiteRegistration { // ops. We keep it inside of `TfLiteRegistration` and use it to route // callbacks properly. TfLiteRegistrationExternal* registration_external; + + // Retrieves asynchronous kernel. + // + // If the `async_kernel` field is nullptr, it means the operation described by + // this TfLiteRegistration object does not support asynchronous execution. + // Otherwise, the function that the field points to should only be called for + // delegate kernel nodes, i.e. `node` should be a delegate kernel node created + // by applying a delegate. + // If the function returns nullptr, that means that the underlying delegate + // does not support asynchronous execution for this `node`. + struct TfLiteAsyncKernel* (*async_kernel)(TfLiteContext* context, + TfLiteNode* node); } TfLiteRegistration; +/// \private +// Old version of `TfLiteRegistration` to maintain binary backward +// compatibility. +// The legacy registration type must be a POD struct type whose field types must +// be a prefix of the field types in TfLiteRegistration, and offset of the first +// field in TfLiteRegistration that is not present in the legacy registration +// type must be greater than or equal to the size of the legacy registration +// type. +// WARNING: This structure is deprecated / not an official part of the +// API. It should be only used for binary backward compatibility. +typedef struct TfLiteRegistration_V2 { + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + void (*free)(TfLiteContext* context, void* buffer); + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); + const char* (*profiling_string)(const TfLiteContext* context, + const TfLiteNode* node); + int32_t builtin_code; + const char* custom_name; + int version; + TfLiteRegistrationExternal* registration_external; +} TfLiteRegistration_V2; + +/// \private // Old version of `TfLiteRegistration` to maintain binary backward // compatibility. -// WARNING: This structure is deprecated / not an official part of the API. -// It should be only used for binary backward compatibility. +// The legacy registration type must be a POD struct type whose field types must +// be a prefix of the field types in TfLiteRegistration, and offset of the first +// field in TfLiteRegistration that is not present in the legacy registration +// type must be greater than or equal to the size of the legacy registration +// type. +// WARNING: This structure is deprecated / not an official part of the +// API. It should be only used for binary backward compatibility. typedef struct TfLiteRegistration_V1 { void* (*init)(TfLiteContext* context, const char* buffer, size_t length); void (*free)(TfLiteContext* context, void* buffer); @@ -1135,6 +1180,20 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate( // 'delegate' is a null pointer. void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate); +// Returns a pointer to the data associated with the provided opaque 'delegate'. +// +// A null pointer will be returned when: +// - The 'delegate' is null. +// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the +// 'delegate' was null. +// - Or in case of any other error. +// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder', +// but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null. +// +// The data_ field of 'delegate' will be returned if the +// 'opaque_delegate_builder' field is null. +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h index c641bc94c..00fe01f7a 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h @@ -328,14 +328,16 @@ template int CountLeadingZeros(T integer_input) { static_assert(std::is_unsigned::value, "Only unsigned integer types handled."); -#if defined(__GNUC__) - return integer_input ? __builtin_clz(integer_input) - : std::numeric_limits::digits; -#else if (integer_input == 0) { return std::numeric_limits::digits; } - +#if defined(__GNUC__) + if (std::is_same::value) { + return __builtin_clz(integer_input); + } else if (std::is_same::value) { + return __builtin_clzll(integer_input); + } +#endif const T one_in_leading_positive = static_cast(1) << (std::numeric_limits::digits - 1); int leading_zeros = 0; @@ -344,7 +346,6 @@ int CountLeadingZeros(T integer_input) { ++leading_zeros; } return leading_zeros; -#endif } template @@ -377,40 +378,49 @@ inline Integer FloorLog2(Integer n) { } } -// The size of the LUT depends on the type of input. For uint8 and int8 inputs -// we use a 256 entries LUT to map all the values in the (u)int8 range. For -// int16 inputs the high 9 bits are used for indexing and the 7 remaining bits -// are used for interpolation. We thus use a 513-entries LUT for int16 cases, -// 512 for the 9-bit indexing and 1 extra entry to interpolate the last value. -template -constexpr int LUTSize() { - static_assert(std::is_same::value || - std::is_same::value || - std::is_same::value, - "Only LUTs with uint8, int8 or int16 inputs are supported."); - // As per c++11: constexpr methods cannot have more than one return statement. - return (std::is_same::value || std::is_same::value) - ? 256 - : 513; +namespace detail { + +// LUTPopulate takes an optional type-erased transform_params to allow passing +// extra parameters to the transform function pointer. const void* is used +// instead of std::function to be compatible with TFLite Micro +template +inline typename std::enable_if::value, + FloatT>::type +LUTTransform(Func transform, const void* /*transform_params*/, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value); +} + +template +inline typename std::enable_if< + std::is_same::value, FloatT>::type +LUTTransform(Func transform, const void* transform_params, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value, transform_params); } // Use the same LUT generation code for both uint8_t and int8_t. Int8_t indexes // will be directly casted to uint8_t, the int8 LUT will thus be ordered as [0, // 1, ..., 127, -128, ..., -2, -1] instead of [-128, -127, ..., -1, 0, 1, ..., // 126, 127]. -template -inline typename std::enable_if::value || - std::is_same::value, - void>::type -LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, - int32_t output_zero_point, float (*transform)(float), T* lut) { +template +inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point, + float output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + T* lut) { + static_assert( + std::is_same::value || std::is_same::value, + "T must be an uint8 or int8 type."); uint8_t* lut_uint8 = reinterpret_cast(lut); const float inverse_scale = 1 / output_scale; int32_t maxval = std::numeric_limits::max(); int32_t minval = std::numeric_limits::min(); for (int32_t val = minval; val <= maxval; ++val) { const float dequantized = input_scale * (val - input_zero_point); - const float transformed = transform(dequantized); + const float transformed = + LUTTransform(transform, transform_params, dequantized); const float rescaled = TfLiteRound(transformed * inverse_scale); const int32_t quantized = static_cast(rescaled + output_zero_point); @@ -421,10 +431,11 @@ LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, // Keep floating-point type configurable for backward compatibility. float // should be used for FloatT by default. -template -inline typename std::enable_if::value, void>::type -LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, - int32_t output_zero_point, FloatT (*transform)(FloatT), T* lut) { +template +inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point, + FloatT output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + int16_t* lut) { static_assert(std::is_floating_point::value, "FloatT must be a floating-point type."); const FloatT input_min = @@ -440,16 +451,21 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, const FloatT step = (input_max - input_min) / nb_steps; const FloatT half_step = step / 2; const FloatT output_scaling_inv = - static_cast(std::numeric_limits::max() - - std::numeric_limits::min() + 1) / + static_cast(std::numeric_limits::max() - + std::numeric_limits::min() + 1) / (output_max - output_min); - const FloatT table_min = static_cast(std::numeric_limits::min()); - const FloatT table_max = static_cast(std::numeric_limits::max()); + const FloatT table_min = + static_cast(std::numeric_limits::min()); + const FloatT table_max = + static_cast(std::numeric_limits::max()); for (int i = 0; i < nb_steps; i++) { - const FloatT val = transform(input_min + i * step); - const FloatT val_midpoint = transform(input_min + i * step + half_step); - const FloatT val_next = transform(input_min + (i + 1) * step); + const FloatT val = + LUTTransform(transform, transform_params, input_min + i * step); + const FloatT val_midpoint = LUTTransform( + transform, transform_params, input_min + i * step + half_step); + const FloatT val_next = LUTTransform(transform, transform_params, + input_min + (i + 1) * step); const FloatT sample_val = TfLiteRound(val * output_scaling_inv); const FloatT midpoint_interp_val = @@ -460,54 +476,84 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale, const FloatT midpoint_err = midpoint_interp_val - midpoint_val; const FloatT bias = TfLiteRound(midpoint_err / 2); - lut[i] = static_cast(std::min( + lut[i] = static_cast(std::min( std::max(sample_val - bias, table_min), table_max)); } - lut[nb_steps] = static_cast(std::min( - std::max(TfLiteRound(transform(input_max) * output_scaling_inv), + lut[nb_steps] = static_cast(std::min( + std::max(TfLiteRound(LUTTransform( + transform, transform_params, input_max) * + output_scaling_inv), table_min), table_max)); } +} // namespace detail + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float), T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, transform_params, lut); +} + template inline typename std::enable_if::value, void>::type LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, int32_t output_zero_point, float (*transform)(float), T* lut) { - LUTPopulate(input_scale, input_zero_point, output_scale, - output_zero_point, transform, lut); + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value, void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, + transform_params, lut); } -// Deprecated and will be removed in future, please use LUTPopulate instead -template -inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max, - FloatT output_min, FloatT output_max, LutOutT* lut) { - static_assert(std::is_same::value, - "Input and output type of the LUT must be the same."); - static_assert(std::is_same::value, - "Only int16_t type LUT are supported."); - static_assert(std::is_same::value, - "Only float type is supported for FloatT."); - using T = LutInT; - - const auto zero_point = [](float min, float max, float scale) { - // Symmetric int16 LUT, we know the zero-point will not overflow an int32_t - // and zero-point from min will be the same as from max. - return static_cast( - static_cast(std::numeric_limits::min()) - min / scale); - }; - - const float scale = static_cast(std::numeric_limits::max() - - std::numeric_limits::min()); - const float input_scale = (input_max - input_min) / scale; - const FloatT output_scale = (output_max - output_min) / scale; - const int32_t input_zero_point = - zero_point(input_min, input_max, input_scale); - const int32_t output_zero_point = - zero_point(output_min, output_max, output_scale); - - return LUTPopulate(input_scale, input_zero_point, output_scale, - output_zero_point, func, lut); +// Deprecated, avoid usage and prefer the float version. Kept for +// backward-compatiblity. +template +inline typename std::enable_if::value, void>::type +LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale, + int32_t output_zero_point, double (*transform)(double), T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +// The size of the LUT depends on the type of input. For uint8 and int8 inputs a +// simple 256 entries LUT is used. For int16 inputs the high 9 bits are used for +// indexing and the 7 remaining bits are used for interpolation. We thus use a +// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry +// to interpolate the last value. +template +constexpr int LUTSize() { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, + "Only LUTs with uint8, int8 or int16 inputs are supported."); + // As per c++11: constexpr methods cannot have more than one return statement. + return (std::is_same::value || std::is_same::value) + ? 256 + : 513; } // int16_t -> int16_t table lookup with interpolation diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/add.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/add.h index 1f5213167..b89a57b96 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/add.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/add.h @@ -194,18 +194,20 @@ inline void Add(const ArithmeticParams& params, } } -template -inline typename std::enable_if::value, void>::type -BroadcastAdd4DSlow(const ArithmeticParams& params, +template +inline typename std::enable_if::value || dummy, void>::type +BroadcastAdd6DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; + NdArrayDesc<6> desc1; + NdArrayDesc<6> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); + RuntimeShape::ExtendedShape(6, output_shape); T activation_min, activation_max; GetActivationParams(params, &activation_min, &activation_max); @@ -221,18 +223,64 @@ BroadcastAdd4DSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, b, y, x, c)] + - input2_data[SubscriptToIndex(desc2, b, y, x, c)], + size_t input1_offset_a = 0; + size_t input2_offset_a = 0; + size_t output_offset_a = 0; + for (int a = 0; a < extended_output_shape.Dims(0); ++a) { + size_t input1_offset_d = input1_offset_a; + size_t input2_offset_d = input2_offset_a; + size_t output_offset_d = output_offset_a; + for (int d = 0; d < extended_output_shape.Dims(1); ++d) { + size_t input1_offset_b = input1_offset_d; + size_t input2_offset_b = input2_offset_d; + size_t output_offset_b = output_offset_d; + for (int b = 0; b < extended_output_shape.Dims(2); ++b) { + size_t input1_offset_y = input1_offset_b; + size_t input2_offset_y = input2_offset_b; + size_t output_offset_y = output_offset_b; + for (int y = 0; y < extended_output_shape.Dims(3); ++y) { + size_t input1_offset_x = input1_offset_y; + size_t input2_offset_x = input2_offset_y; + size_t output_offset_x = output_offset_y; + for (int x = 0; x < extended_output_shape.Dims(4); ++x) { + size_t input1_offset_c = input1_offset_x; + size_t input2_offset_c = input2_offset_x; + size_t output_offset_c = output_offset_x; + for (int c = 0; c < extended_output_shape.Dims(5); ++c) { + output_data[output_offset_c] = ActivationFunctionWithMinMax( + input1_data[input1_offset_c] + input2_data[input2_offset_c], activation_min, activation_max); + input1_offset_c += desc1.strides[5]; + input2_offset_c += desc2.strides[5]; + ++output_offset_c; + } + input1_offset_x += desc1.strides[4]; + input2_offset_x += desc2.strides[4]; + output_offset_x += extended_output_shape.Dims(5); + } + input1_offset_y += desc1.strides[3]; + input2_offset_y += desc2.strides[3]; + output_offset_y += + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_b += desc1.strides[2]; + input2_offset_b += desc2.strides[2]; + output_offset_b += extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } + input1_offset_d += desc1.strides[1]; + input2_offset_d += desc2.strides[1]; + output_offset_d += + extended_output_shape.Dims(2) * extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_a += desc1.strides[0]; + input2_offset_a += desc2.strides[0]; + output_offset_a += + extended_output_shape.Dims(1) * extended_output_shape.Dims(2) * + extended_output_shape.Dims(3) * extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } } @@ -241,16 +289,16 @@ BroadcastAdd4DSlow(const ArithmeticParams& params, // choice of the shift (20 or 15, accordingly - see add.cc for more comments). template inline typename std::enable_if::value, void>::type -BroadcastAdd4DSlow(const ArithmeticParams& params, +BroadcastAdd6DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; + NdArrayDesc<6> desc1; + NdArrayDesc<6> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); + RuntimeShape::ExtendedShape(6, output_shape); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -263,44 +311,98 @@ BroadcastAdd4DSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32_t input1_val = - params.input1_offset + - input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32_t input2_val = - params.input2_offset + - input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32_t shifted_input1_val = - input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = - input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, - params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, - params.input2_shift); - const int32_t raw_sum = scaled_input1_val + scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + size_t input1_offset_a = 0; + size_t input2_offset_a = 0; + size_t output_offset_a = 0; + for (int a = 0; a < extended_output_shape.Dims(0); ++a) { + size_t input1_offset_d = input1_offset_a; + size_t input2_offset_d = input2_offset_a; + size_t output_offset_d = output_offset_a; + for (int d = 0; d < extended_output_shape.Dims(1); ++d) { + size_t input1_offset_b = input1_offset_d; + size_t input2_offset_b = input2_offset_d; + size_t output_offset_b = output_offset_d; + for (int b = 0; b < extended_output_shape.Dims(2); ++b) { + size_t input1_offset_y = input1_offset_b; + size_t input2_offset_y = input2_offset_b; + size_t output_offset_y = output_offset_b; + for (int y = 0; y < extended_output_shape.Dims(3); ++y) { + size_t input1_offset_x = input1_offset_y; + size_t input2_offset_x = input2_offset_y; + size_t output_offset_x = output_offset_y; + for (int x = 0; x < extended_output_shape.Dims(4); ++x) { + size_t input1_offset_c = input1_offset_x; + size_t input2_offset_c = input2_offset_x; + size_t output_offset_c = output_offset_x; + for (int c = 0; c < extended_output_shape.Dims(5); ++c) { + const int32_t input1_val = + params.input1_offset + input1_data[input1_offset_c]; + const int32_t input2_val = + params.input2_offset + input2_data[input2_offset_c]; + const int32_t shifted_input1_val = + input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = + input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, + params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, + params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = std::min( + params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[output_offset_c] = static_cast(clamped_output); + input1_offset_c += desc1.strides[5]; + input2_offset_c += desc2.strides[5]; + ++output_offset_c; + } + input1_offset_x += desc1.strides[4]; + input2_offset_x += desc2.strides[4]; + output_offset_x += extended_output_shape.Dims(5); + } + input1_offset_y += desc1.strides[3]; + input2_offset_y += desc2.strides[3]; + output_offset_y += + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_b += desc1.strides[2]; + input2_offset_b += desc2.strides[2]; + output_offset_b += extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } + input1_offset_d += desc1.strides[1]; + input2_offset_d += desc2.strides[1]; + output_offset_d += + extended_output_shape.Dims(2) * extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_a += desc1.strides[0]; + input2_offset_a += desc2.strides[0]; + output_offset_a += + extended_output_shape.Dims(1) * extended_output_shape.Dims(2) * + extended_output_shape.Dims(3) * extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } } +template +inline void BroadcastAdd4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { + return BroadcastAdd6DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, const RuntimeShape& unswitched_input1_shape, const uint8_t* unswitched_input1_data, diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h index 8d9b318cc..579964dc3 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h @@ -35,30 +35,31 @@ inline void CheckArithmeticParams(const ArithmeticParams& params) { TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits::max()); } -inline void ElementWise( - int size, const ArithmeticParams& params, const int8_t* input1_data, - const int8_t* input2_data, int8_t* output_data, - void (*check_arithmetic_params)(const ArithmeticParams&), - int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { +// TODO(b/270589088): move to a more appropriate file (b/270589088#comment2) +template +void ElementWise(int size, const ArithmeticParams& params, const T* input1_data, + const T* input2_data, T* output_data, + void (*check_arithmetic_params)(const ArithmeticParams&), + T (*binary_func)(T, T, const ArithmeticParams&)) { CheckArithmeticParams(params); for (int i = 0; i < size; ++i) { output_data[i] = binary_func(input1_data[i], input2_data[i], params); } } - -inline void BroadcastBinaryFunction4DSlow( +// TODO(b/270589088): move to a more appropriate file. (b/270589088#comment2) +template +void BroadcastBinaryFunction6DSlow( const ArithmeticParams& params, const RuntimeShape& input1_shape, - const int8_t* input1_data, const RuntimeShape& input2_shape, - const int8_t* input2_data, const RuntimeShape& output_shape, - int8_t* output_data, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data, void (*check_arithmetic_params)(const ArithmeticParams&), - int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; + T (*binary_func)(T, T, const ArithmeticParams&)) { + NdArrayDesc<6> desc1; + NdArrayDesc<6> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); + RuntimeShape::ExtendedShape(6, output_shape); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -71,19 +72,79 @@ inline void BroadcastBinaryFunction4DSlow( // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func( - input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)], params); + size_t input1_offset_a = 0; + size_t input2_offset_a = 0; + size_t output_offset_a = 0; + for (int a = 0; a < extended_output_shape.Dims(0); ++a) { + size_t input1_offset_d = input1_offset_a; + size_t input2_offset_d = input2_offset_a; + size_t output_offset_d = output_offset_a; + for (int d = 0; d < extended_output_shape.Dims(1); ++d) { + size_t input1_offset_b = input1_offset_d; + size_t input2_offset_b = input2_offset_d; + size_t output_offset_b = output_offset_d; + for (int b = 0; b < extended_output_shape.Dims(2); ++b) { + size_t input1_offset_y = input1_offset_b; + size_t input2_offset_y = input2_offset_b; + size_t output_offset_y = output_offset_b; + for (int y = 0; y < extended_output_shape.Dims(3); ++y) { + size_t input1_offset_x = input1_offset_y; + size_t input2_offset_x = input2_offset_y; + size_t output_offset_x = output_offset_y; + for (int x = 0; x < extended_output_shape.Dims(4); ++x) { + size_t input1_offset_c = input1_offset_x; + size_t input2_offset_c = input2_offset_x; + size_t output_offset_c = output_offset_x; + for (int c = 0; c < extended_output_shape.Dims(5); ++c) { + output_data[output_offset_c] = + binary_func(input1_data[input1_offset_c], + input2_data[input2_offset_c], params); + input1_offset_c += desc1.strides[5]; + input2_offset_c += desc2.strides[5]; + ++output_offset_c; + } + input1_offset_x += desc1.strides[4]; + input2_offset_x += desc2.strides[4]; + output_offset_x += extended_output_shape.Dims(5); + } + input1_offset_y += desc1.strides[3]; + input2_offset_y += desc2.strides[3]; + output_offset_y += + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_b += desc1.strides[2]; + input2_offset_b += desc2.strides[2]; + output_offset_b += extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } + input1_offset_d += desc1.strides[1]; + input2_offset_d += desc2.strides[1]; + output_offset_d += + extended_output_shape.Dims(2) * extended_output_shape.Dims(3) * + extended_output_shape.Dims(4) * extended_output_shape.Dims(5); } + input1_offset_a += desc1.strides[0]; + input2_offset_a += desc2.strides[0]; + output_offset_a += + extended_output_shape.Dims(1) * extended_output_shape.Dims(2) * + extended_output_shape.Dims(3) * extended_output_shape.Dims(4) * + extended_output_shape.Dims(5); } } +template +void BroadcastBinaryFunction4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data, + void (*check_arithmetic_params)(const ArithmeticParams&), + T (*binary_func)(T, T, const ArithmeticParams&)) { + BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data, + check_arithmetic_params, binary_func); +} + inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) { const int32_t input1_val = params.input1_offset + x; const int32_t input2_val = params.input2_offset + y; @@ -127,6 +188,18 @@ inline void Add(const ArithmeticParams& params, AddElementwise(flat_size, params, input1_data, input2_data, output_data); } +inline void BroadcastAdd6DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int8_t* input1_data, + const RuntimeShape& input2_shape, + const int8_t* input2_data, + const RuntimeShape& output_shape, + int8_t* output_data) { + BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data, + CheckArithmeticParams, AddFunc); +} + inline void BroadcastAdd4DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int8_t* input1_data, @@ -134,7 +207,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, const int8_t* input2_data, const RuntimeShape& output_shape, int8_t* output_data) { - BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape, + BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, CheckArithmeticParams, AddFunc); } diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h index 5ddf04aea..ba3e2a81d 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -134,20 +133,6 @@ inline void ConvPerChannel( } } -inline void ConvPerChannelWithPackedInt4Weights( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_input, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK(unpacked_filter_data != nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_input, filter_shape.FlatSize(), unpacked_filter_data); - ConvPerChannel(params, output_multiplier, output_shift, input_shape, - input_data, filter_shape, unpacked_filter_data, bias_shape, - bias_data, output_shape, output_data); -} // Fixed-point per-channel-quantization convolution reference kernel. // 16-bit data and 8-bit filter diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h index 312ba0f93..7676fce0f 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -122,21 +121,6 @@ inline void DepthwiseConvPerChannel( } } -inline void DepthwiseConvPerChannelWithPackedInt4Weights( - const DepthwiseParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_NE(unpacked_filter_data, nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_data, filter_shape.FlatSize(), unpacked_filter_data); - DepthwiseConvPerChannel(params, output_multiplier, output_shift, input_shape, - input_data, filter_shape, unpacked_filter_data, - bias_shape, bias_data, output_shape, output_data); -} - inline void DepthwiseConvPerChannel( const DepthwiseParams& params, const int32_t* output_multiplier, const int32_t* output_shift, const RuntimeShape& input_shape, diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h index 77c766d25..3a74402ed 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" namespace tflite { namespace reference_integer_ops { @@ -29,14 +28,15 @@ namespace reference_integer_ops { // zero_point (params.weights_offset) is always 0. // However, for per-tensor functions, params.weights_offset is still applied for // backward compatibility. - -inline void FullyConnectedPerChannel( +template +void FullyConnectedPerChannel( const FullyConnectedParams& params, const int32_t* output_multiplier, const int* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { + const InputType* input_data, const RuntimeShape& filter_shape, + const WeightType* filter_data, const RuntimeShape& bias_shape, + const BiasType* bias_data, const RuntimeShape& output_shape, + OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t output_offset = params.output_offset; const int32_t output_activation_min = params.quantized_activation_min; @@ -52,7 +52,7 @@ inline void FullyConnectedPerChannel( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -61,62 +61,26 @@ inline void FullyConnectedPerChannel( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c], - output_shift[out_c]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -template -inline void FullyConnectedPerChannel( - const FullyConnectedParams& params, const int32_t* output_multiplier, - const int* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += filter_val * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } int32_t acc_scaled = MultiplyByQuantizedMultiplier( acc, output_multiplier[out_c], output_shift[out_c]); + acc_scaled += output_offset; acc_scaled = std::max(acc_scaled, output_activation_min); acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { +template +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, + const InputType* input_data, + const RuntimeShape& filter_shape, + const WeightType* filter_data, + const RuntimeShape& bias_shape, const BiasType* bias_data, + const RuntimeShape& output_shape, OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t filter_offset = params.weights_offset; const int32_t output_offset = params.output_offset; @@ -136,7 +100,7 @@ inline void FullyConnected( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -145,67 +109,13 @@ inline void FullyConnected( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -inline void FullyConnectedWithPackedInt4Weights( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, int8_t* unpacked_filter_data, - const RuntimeShape& bias_shape, const int32_t* bias_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_NE(unpacked_filter_data, nullptr); - tflite::tensor_utils::UnpackDenseInt4IntoInt8( - filter_data, filter_shape.FlatSize(), unpacked_filter_data); - FullyConnected(params, input_shape, input_data, filter_shape, - unpacked_filter_data, bias_shape, bias_data, output_shape, - output_data); -} - -template -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t filter_offset = params.weights_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } int32_t acc_scaled = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc_scaled += output_offset; acc_scaled = std::max(acc_scaled, output_activation_min); acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h index 09d37b726..7e3f690e9 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h @@ -1,10 +1,10 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -15,65 +15,4 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -template -inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, - int32_t shift, const RuntimeShape& unextended_input_shape, - const integer_type* input_data, int32_t input_zero_point, - const RuntimeShape& unextended_output_shape, - integer_type* output_data, int32_t output_zero_point) { - // Current implementation only supports dimension equals 4 and simultaneous - // reduction over width and height. - TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); - TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - const int output_batch = output_shape.Dims(0); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int output_depth = output_shape.Dims(3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int num_elements_in_axis = input_width * input_height; - - TFLITE_CHECK_EQ(op_params.axis_count, 2); - TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1)); - TFLITE_CHECK_EQ(output_height, 1); - TFLITE_CHECK_EQ(output_width, 1); - - static constexpr int32_t kMinInt = std::numeric_limits::min(); - static constexpr int32_t kMaxInt = std::numeric_limits::max(); - - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - int32_t acc = 0; - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] - - input_zero_point; - } - } - acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); - acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis - : (acc - num_elements_in_axis / 2) / num_elements_in_axis; - acc += output_zero_point; - acc = std::min(std::max(acc, kMinInt), kMaxInt); - output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - static_cast(acc); - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h index 22e897409..05066184c 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h @@ -24,10 +24,10 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { -template -inline void MulElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { +template +void MulElementwise(int size, const ArithmeticParams& params, + const InputType* input1_data, const InputType* input2_data, + OutputType* output_data) { for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; const int32_t input2_val = params.input2_offset + input2_data[i]; @@ -39,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params, const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/mul.h index 531977327..2767fef26 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/mul.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/mul.h @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ inline void Mul(const ArithmeticParams& params, const int flat_size = MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape); for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( + output_data[i] = ActivationFunctionWithMinMax( input1_data[i] * input2_data[i], output_activation_min, output_activation_max); } @@ -128,14 +128,18 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params, } } -template -void BroadcastMul4DSlow(const ArithmeticParams& params, - const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const T* input2_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { +template +inline typename std::enable_if< + !is_small_integer::value || enable_for_short_integers, void>::type +BroadcastMul4DSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { T output_activation_min; T output_activation_max; GetActivationParams(params, &output_activation_min, &output_activation_max); @@ -167,7 +171,7 @@ void BroadcastMul4DSlow(const ArithmeticParams& params, for (int x = 0; x < output_shape.Dims(2); ++x) { for (int c = 0; c < output_shape.Dims(3); ++c) { output_data[Offset(output_shape, b, y, x, c)] = - ActivationFunctionWithMinMax( + ActivationFunctionWithMinMax( input1_data[SubscriptToIndex(desc1, b, y, x, c)] * input2_data[SubscriptToIndex(desc2, b, y, x, c)], output_activation_min, output_activation_max); diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h index adc435f90..5b795ea8f 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h @@ -268,11 +268,11 @@ inline bool Mean(const T* input_data, const int* input_dims, return true; } -template inline void Mean(const tflite::MeanParams& op_params, const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, T* output_data) { + const float* input_data, + const RuntimeShape& unextended_output_shape, + float* output_data) { ruy::profiler::ScopeLabel label("Mean4D"); // Current implementation only supports dimension equals 4 and simultaneous @@ -312,78 +312,21 @@ inline void Mean(const tflite::MeanParams& op_params, } } -inline void Mean(const tflite::MeanParams& op_params, - const RuntimeShape& unextended_input_shape, - const uint8_t* input_data, int32_t input_zero_point, - float input_scale, const RuntimeShape& unextended_output_shape, - uint8_t* output_data, int32_t output_zero_point, - float output_scale) { - ruy::profiler::ScopeLabel label("Mean4D/Uint8"); - - // Current implementation only supports dimension equals 4 and simultaneous - // reduction over width and height. - TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); - TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - const int output_batch = output_shape.Dims(0); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int output_depth = output_shape.Dims(3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const float num_elements_in_axis = input_width * input_height; - - TFLITE_CHECK_EQ(op_params.axis_count, 2); - TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1)); - TFLITE_CHECK_EQ(output_height, 1); - TFLITE_CHECK_EQ(output_width, 1); - - constexpr int32_t kMinValue = std::numeric_limits::min(); - constexpr int32_t kMaxValue = std::numeric_limits::max(); - - float temp = input_zero_point * input_scale / output_scale; - temp = temp > 0 ? temp + 0.5f : temp - 0.5f; - int32_t bias = output_zero_point - static_cast(temp); - double real_scale = - static_cast(input_scale / (num_elements_in_axis * output_scale)); - - int32_t multiplier; - int shift; - QuantizeMultiplier(real_scale, &multiplier, &shift); - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - int32_t acc = 0; - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; - } - } - acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); - acc += bias; - acc = std::min(std::max(acc, kMinValue), kMaxValue); - output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - static_cast(acc); - } - } -} - // Computes the mean of elements across dimensions given in axis. // It does so in two stages, first calculates the sum of elements along the axis // then divides it by the number of element in axis for quantized values. template inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point, - float input_scale, const int* input_dims, - const int input_num_dims, T* output_data, - int32_t output_zero_point, float output_scale, + const int* input_dims, const int input_num_dims, + T* output_data, int32_t output_multiplier, + int output_shift, int32_t output_zero_point, const int* output_dims, const int output_num_dims, const int* axis, const int num_axis_dimensions, bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum, bool compute_sum) { + const int32_t kMinValue = std::numeric_limits::min(); + const int32_t kMaxValue = std::numeric_limits::max(); const bool uint8_case = std::is_same::value; const bool int16_case = std::is_same::value; if (uint8_case) { @@ -430,40 +373,46 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point, } // Calculate mean by dividing output_data by num of aggregated element. - size_t num_elements_in_axis = 1; + int64_t num_elements_in_axis = 1; for (int idx = 0; idx < num_resolved_axis; ++idx) { size_t current = static_cast(input_dims[resolved_axis[idx]]); // Overflow prevention. - if (current > (std::numeric_limits::max() / num_elements_in_axis)) { + if (current > static_cast(std::numeric_limits::max() / + num_elements_in_axis)) { return false; } num_elements_in_axis *= current; } - if (num_elements_in_axis > 0) { - const float scale = input_scale / output_scale; - if (compute_sum) { - // TODO(b/116341117): Eliminate float and do this completely in 8bit. - const float bias = -input_zero_point * scale * num_elements_in_axis; - for (size_t idx = 0; idx < num_outputs; ++idx) { - const U value = - static_cast(TfLiteRound(temp_sum[idx] * scale + bias)) + - output_zero_point; - output_data[idx] = static_cast(value); - } - } else { - const float bias = -input_zero_point * scale; - for (size_t idx = 0; idx < num_outputs; ++idx) { - float float_mean = static_cast(temp_sum[idx]) / - static_cast(num_elements_in_axis); - float result = TfLiteMin( - TfLiteRound(float_mean * scale + bias) + output_zero_point, - static_cast(std::numeric_limits::max())); - result = TfLiteMax(result, - static_cast(std::numeric_limits::min())); - output_data[idx] = static_cast(result); - } - } + if (num_elements_in_axis == 0) { + return true; + } + + // Readapt output rescaling when calculating the mean to integrate a + // 1/num_elements_in_axis multiplier. + if (!compute_sum) { + TFLITE_DCHECK_GE(num_elements_in_axis, 0); + int shift = + 63 - CountLeadingZeros(static_cast(num_elements_in_axis)); + // To avoid any overflow risk 'shift' should be <= 32 and to satisfy + // 'MultiplyByQuantizedMultiplier' pre-conditions 'output_shift - shift' + // should be >= -31. Clamp the value at the price of some precision loss. + shift = std::min(shift, 32); + shift = std::min(shift, 31 + output_shift); + output_multiplier = static_cast( + (static_cast(output_multiplier) << shift) / + num_elements_in_axis); + output_shift = output_shift - shift; + } + + for (size_t idx = 0; idx < num_outputs; ++idx) { + const U shifted_sum = + static_cast(temp_sum[idx] - input_zero_point * num_elements_in_axis); + int32_t output = MultiplyByQuantizedMultiplier( + shifted_sum, output_multiplier, output_shift) + + output_zero_point; + output = std::min(std::max(output, kMinValue), kMaxValue); + output_data[idx] = static_cast(output); } return true; } @@ -478,8 +427,8 @@ inline bool QuantizedMeanOrSumExtraArgs( bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum, bool compute_sum) { return QuantizedMeanOrSum( - input_data, input_zero_point, input_scale, input_dims, input_num_dims, - output_data, output_zero_point, output_scale, output_dims, + input_data, input_zero_point, input_dims, input_num_dims, output_data, + output_multiplier, output_shift, output_zero_point, output_dims, output_num_dims, axis, num_axis_dimensions, keep_dims, temp_index, resolved_axis, temp_sum, compute_sum); } diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/runtime_shape.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/runtime_shape.h index c2678b57d..0e4df2c36 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/runtime_shape.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/runtime_shape.h @@ -15,6 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ +#include "tensorflow/lite/kernels/internal/compatibility.h" + namespace tflite { template @@ -27,16 +29,19 @@ class RuntimeShape { public: RuntimeShape& operator=(RuntimeShape const&) = delete; - // RuntimeShape in TFLM supports up to 5 dimensions. + // RuntimeShape in TFLM supports up to 6 dimensions. // The name kMaxSmallSize comes from the same file of the upstream // tensorflow lite repo and need to be kept the same for max reuse. - static constexpr int kMaxSmallSize = 5; + static constexpr int kMaxSmallSize = 6; RuntimeShape() : size_(0) {} - explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {} + explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) { + TFLITE_DCHECK_LE(dimensions_count, kMaxSmallSize); + } RuntimeShape(int shape_size, int32_t value) : size_(shape_size) { + TFLITE_DCHECK_LE(shape_size, kMaxSmallSize); for (int i = 0; i < shape_size; ++i) { SetDim(i, value); } @@ -44,6 +49,7 @@ class RuntimeShape { RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(dimensions_count) { + // check of dimensions_count handled by ReplaceWith() ReplaceWith(dimensions_count, dims_data); } @@ -69,6 +75,7 @@ class RuntimeShape { static RuntimeShape ExtendedShape(int new_shape_size, const RuntimeShape& shape) { + TFLITE_DCHECK_LE(new_shape_size, kMaxSmallSize); return RuntimeShape(new_shape_size, shape, 1); } int32_t* DimsData() { return dims_; } @@ -76,6 +83,7 @@ class RuntimeShape { const int32_t* DimsDataUpTo5D() const { return dims_; } void ReplaceWith(int dimensions_count, const int32_t* dims_data) { + TFLITE_DCHECK_LE(dimensions_count, kMaxSmallSize); size_ = dimensions_count; int32_t* dst_dims = DimsData(); std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t)); diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/types.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/types.h index 77f741bbb..043a85136 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/types.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/types.h @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -659,6 +659,9 @@ struct ArithmeticParams { // int64_t activation params. int64_t int64_activation_min; int64_t int64_activation_max; + // int16_t activation params. + int16_t int16_activation_min; + int16_t int16_activation_max; // Processed output dimensions. // Let input "a" be the one that broadcasts in the faster-changing dimension. @@ -1022,6 +1025,18 @@ inline void SetActivationParams(int32_t min, int32_t max, P* params) { params->quantized_activation_max = max; } +template +inline void SetActivationParams(uint32_t min, uint32_t max, P* params) { + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template +inline void SetActivationParams(int16_t min, int16_t max, P* params) { + params->int16_activation_min = min; + params->int16_activation_max = max; +} + template inline void SetActivationParams(int64_t min, int64_t max, P* params) { params->int64_activation_min = min; @@ -1034,6 +1049,18 @@ inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) { *max = params.quantized_activation_max; } +template +inline void GetActivationParams(const P& params, uint32_t* min, uint32_t* max) { + *min = params.quantized_activation_min; + *max = params.quantized_activation_max; +} + +template +inline void GetActivationParams(const P& params, int16_t* min, int16_t* max) { + *min = params.int16_activation_min; + *max = params.int16_activation_max; +} + template inline void GetActivationParams(const P& params, float* min, float* max) { *min = params.float_activation_min; diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc index 7b7e37362..4fb035e26 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc +++ b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc @@ -436,7 +436,7 @@ TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, // that build. What appears to be happening is that while the linker drops the // unsused function, the string library that gets pulled in is not dropped, // resulting in the increased binary size. -const std::string GetShapeDebugString(const TfLiteIntArray* shape) { +std::string GetShapeDebugString(const TfLiteIntArray* shape) { std::string str; for (int d = 0; d < shape->size; ++d) { if (str.empty()) diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h index fc586b1e0..608db54ae 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h @@ -297,7 +297,7 @@ TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, const TfLiteTensor* input, TfLiteIntArray** output_shape); -const std::string GetShapeDebugString(const TfLiteIntArray* shape); +std::string GetShapeDebugString(const TfLiteIntArray* shape); #endif // !defined(TF_LITE_STATIC_MEMORY) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD index e2ed47685..9f3c8710b 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD @@ -1,45 +1,34 @@ # Description: # TensorFlow Lite for Microcontrollers "hello world" example. +load("@tflm_pip_deps//:requirements.bzl", "requirement") load( "//tensorflow/lite/micro:build_def.bzl", - "generate_cc_arrays", "micro_copts", ) package( - default_visibility = ["//visibility:public"], # Disabling layering_check because of http://b/177257332 features = ["-layering_check"], licenses = ["notice"], ) -generate_cc_arrays( - name = "generated_hello_world_model_cc", - src = "hello_world.tflite", - out = "hello_world_model_data.cc", -) - -generate_cc_arrays( - name = "generated_hello_world_model_hdr", - src = "hello_world.tflite", - out = "hello_world_model_data.h", -) - cc_library( name = "model", srcs = [ - ":generated_hello_world_model_cc", + "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_float_model_cc", + "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_int8_model_cc", ], hdrs = [ - ":generated_hello_world_model_hdr", + "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_float_model_hdr", + "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_int8_model_hdr", ], copts = micro_copts(), ) cc_test( - name = "hello_world_test", + name = "evaluate_cc_test", srcs = [ - "hello_world_test.cc", + "evaluate_test.cc", ], deps = [ ":model", @@ -51,57 +40,41 @@ cc_test( ], ) -cc_library( - name = "output_handler", - srcs = [ - "output_handler.cc", - ], - hdrs = [ - "output_handler.h", - ], - copts = micro_copts(), +py_binary( + name = "evaluate", + srcs = ["evaluate.py"], + data = ["//tensorflow/lite/micro/examples/hello_world/models:hello_world_float.tflite"], + python_version = "PY3", + srcs_version = "PY3", deps = [ - "//tensorflow/lite/c:common", - "//tensorflow/lite/micro:micro_log", + "@absl_py//absl:app", + "@absl_py//absl/flags", + "@absl_py//absl/logging", + requirement("numpy"), + requirement("tensorflow-cpu"), + "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime", ], ) -cc_library( - name = "constants", - srcs = [ - "constants.cc", +py_binary( + name = "evaluate_test", + srcs = ["evaluate_test.py"], + data = [ + "//tensorflow/lite/micro/examples/hello_world/models:hello_world_float.tflite", ], - hdrs = [ - "constants.h", + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":evaluate", ], - copts = micro_copts(), ) -cc_binary( - name = "hello_world", - srcs = [ - "main.cc", - "main_functions.cc", - "main_functions.h", - ], - copts = [ - "-Werror", - "-Wsign-compare", - ], +py_binary( + name = "train", + srcs = ["train.py"], + srcs_version = "PY3", deps = [ - ":constants", - ":model", - ":output_handler", - "//tensorflow/lite/micro:micro_framework", - "//tensorflow/lite/micro:micro_log", - "//tensorflow/lite/micro:op_resolvers", - "//tensorflow/lite/micro:system_setup", - "//tensorflow/lite/schema:schema_fbs", + requirement("numpy"), + requirement("tensorflow-cpu"), ], ) - -sh_test( - name = "hello_world_binary_test", - srcs = ["hello_world_binary_test.sh"], - data = [":hello_world"], -) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc index ad058e819..2b9c57a4e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc @@ -1,57 +1,37 @@ -EXAMPLE_NAME:=hello_world - HELLO_WORLD_TEST_SRCS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/$(EXAMPLE_NAME)_test.cc - -OUTPUT_HANDLER_TEST_SRCS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler_test.cc \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.cc - -OUTPUT_HANDLER_TEST_HDRS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.h \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.h +$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/evaluate_test.cc HELLO_WORLD_SRCS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main.cc \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main_functions.cc \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.cc \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.cc +$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/evaluate_test.cc -HELLO_WORLD_HDRS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.h \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.h \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main_functions.h +HELLO_WORLD_HDRS := HELLO_WORLD_GENERATOR_INPUTS := \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world.tflite +$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite \ +$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite HELLO_WORLD_GENERATED_SRCS := \ -$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world_model_data.cc +$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.cc \ +$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_int8_model_data.cc HELLO_WORLD_GENERATED_HDRS := \ -$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world_model_data.h - -#Find any platform - specific rules for this example. -include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/*/Makefile.inc) +$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.h \ +$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_int8_model_data.h # Tests loading and running the sine model. -$(eval $(call microlite_test,$(EXAMPLE_NAME)_test,\ +$(eval $(call microlite_test,evaluate_cc_test,\ $(HELLO_WORLD_TEST_SRCS),,$(HELLO_WORLD_GENERATOR_INPUTS))) -# Tests producing an output. -$(eval $(call microlite_test,output_handler_test,\ -$(OUTPUT_HANDLER_TEST_SRCS),$(OUTPUT_HANDLER_TEST_HDRS))) - # Builds a standalone binary. -$(eval $(call microlite_test,$(EXAMPLE_NAME),\ -$(HELLO_WORLD_SRCS),$(HELLO_WORLD_HDRS),$(HELLO_WORLD_GENERATOR_INPUTS))) +$(eval $(call microlite_test,hello_world,\ +$(HELLO_WORLD_SRCS),,$(HELLO_WORLD_GENERATOR_INPUTS))) # Add sources and headers generated from $(HELLO_WORLD_GENERATOR_INPUTS). HELLO_WORLD_SRCS += $(HELLO_WORLD_GENERATED_SRCS) HELLO_WORLD_HDRS += $(HELLO_WORLD_GENERATED_HDRS) -list_$(EXAMPLE_NAME)_example_sources: +list_hello_world_example_sources: @echo $(HELLO_WORLD_SRCS) -list_$(EXAMPLE_NAME)_example_headers: +list_hello_world_example_headers: @echo $(HELLO_WORLD_HDRS) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md index b5bb00ff2..6740bb412 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md @@ -10,17 +10,44 @@ microcontroller. ## Table of contents +- [Run the evaluate.py script on a development machine](#run-the-evaluate-script-on-a-development-machine) - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Train your own model](#train-your-own-model) +## Run the evaluate.py script on a development machine +The evaluate.py script runs the hello_world.tflite model with x_values in the +range of [0, 2*PI]. The script plots a diagram of the predicted value of sinwave +using TFLM interpreter and compare that prediction with the actual value +generated by the numpy lib. +```bash +bazel build :evaluate +bazel run :evaluate +bazel run :evaluate -- --use_tflite +``` +![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflm.png) ![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflite.png) + +## Run the evaluate_test.py script on a development machine +These tests verify the input/output as well as the prediction of the +hello_world.tflite model. There is a test to also verify the correctness of +the model by running both TFLM and TFlite interpreter and then comparing the +prediction from both interpreters. +```bash +bazel build :evaluate_test +bazel run :evaluate_test +``` + ## Run the tests on a development machine +Run the cc test using bazel ```bash -make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads -make -f tensorflow/lite/micro/tools/make/Makefile test_hello_world_test +bazel run tensorflow/lite/micro/examples/hello_world:evaluate_cc_test +``` +And to run it using make +```bash +make -f tensorflow/lite/micro/tools/make/Makefile test_evaluate_cc_test ``` -The source for the test is [hello_world_test.cc](hello_world_test.cc). +The source for the test is [evaluate_test.cc](evaluate_test.cc). It's a fairly small amount of code that creates an interpreter, gets a handle to a model that's been compiled into the program, and then invokes the interpreter with the model and sample inputs. @@ -28,6 +55,40 @@ with the model and sample inputs. ## Train your own model So far you have used an existing trained model to run inference on -microcontrollers. If you wish to train your own model, follow the instructions -given in the [train/](train/) directory. +microcontrollers. If you wish to train your own model, here are the scripts +that can help you to achieve that. + +```bash +bazel build tensorflow/lite/micro/examples/hello_world:train +``` +And to run it +```bash +bazel-bin/tensorflow/lite/micro/examples/hello_world/train --save_tf_model +--save_dir=/tmp/model_created/ +``` +The above script will create a TF model and TFlite model inside the +`/tmp/model_created` directory. + +Now the above model is a `float` model. Means it can take floating point input +and can produce floating point output. + +If we want a fully quantized model we can use the `ptq.py` script inside the +quantization directory. The `ptq.py` script can take a floating point TF model +and can produce a quantized model. + +Build the `ptq.py` script like +```bash +bazel build tensorflow/lite/micro/examples/hello_world/quantization:ptq +``` + +Then we can run the `ptq` script to convert the float model to quant model as +follows. Note that we are using the directory (`/tmp/model_created`) of the +TF model as the source_model_dir here. The quant model +(named `hello_world_int8.tflite`) will be created inside the target_dir. +The `ptq.py` script will convert the `TF model` found inside the +`/tmp/model_created` folder and convert it to a `int8` TFlite model. +```bash +bazel-bin/tensorflow/lite/micro/examples/hello_world/quantization/ptq +--source_model_dir=/tmp/model_created --target_dir=/tmp/quant_model/ +``` diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py new file mode 100644 index 000000000..246091ef0 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py @@ -0,0 +1,131 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tensorflow as tf +from absl import app +from absl import flags +import numpy as np +import matplotlib.pyplot as plt +from tensorflow.python.platform import resource_loader +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime + +_USE_TFLITE_INTERPRETER = flags.DEFINE_bool( + 'use_tflite', + False, + 'Inference with the TF Lite interpreter instead of the TFLM interpreter', +) + +_PREFIX_PATH = resource_loader.get_path_to_datafile('') + + +def invoke_tflm_interpreter(input_shape, interpreter, x_value, input_index, + output_index): + input_data = np.reshape(x_value, input_shape) + interpreter.set_input(input_data, input_index) + interpreter.invoke() + y_quantized = np.reshape(interpreter.get_output(output_index), -1)[0] + return y_quantized + + +def invoke_tflite_interpreter(input_shape, interpreter, x_value, input_index, + output_index): + input_data = np.reshape(x_value, input_shape) + interpreter.set_tensor(input_index, input_data) + interpreter.invoke() + tflite_output = interpreter.get_tensor(output_index) + y_quantized = np.reshape(tflite_output, -1)[0] + return y_quantized + + +# Generate a list of 1000 random floats in the range of 0 to 2*pi. +def generate_random_input(sample_count=1000): + # Generate a uniformly distributed set of random numbers in the range from + # 0 to 2Ï€, which covers a complete sine wave oscillation + x_values = np.random.uniform(low=0, high=2 * np.pi, + size=sample_count).astype(np.float32) + # Shuffle the values to guarantee they're not in order + np.random.shuffle(x_values) + return x_values + + +# Invoke the tflm interpreter with x_values in the range of [0, 2*PI] and +# returns the prediction of the interpreter. +def get_tflm_prediction(model_path, x_values): + # Create the tflm interpreter + tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path) + + input_shape = np.array(tflm_interpreter.get_input_details(0).get('shape')) + + y_predictions = np.empty(x_values.size, dtype=np.float32) + + for i, x_value in enumerate(x_values): + y_predictions[i] = invoke_tflm_interpreter(input_shape, + tflm_interpreter, + x_value, + input_index=0, + output_index=0) + return y_predictions + + +# Invoke the tflite interpreter with x_values in the range of [0, 2*PI] and +# returns the prediction of the interpreter. +def get_tflite_prediction(model_path, x_values): + # TFLite interpreter + tflite_interpreter = tf.lite.Interpreter( + model_path=model_path, + experimental_op_resolver_type=tf.lite.experimental.OpResolverType. + BUILTIN_REF, + ) + tflite_interpreter.allocate_tensors() + + input_details = tflite_interpreter.get_input_details()[0] + output_details = tflite_interpreter.get_output_details()[0] + input_shape = np.array(input_details.get('shape')) + + y_predictions = np.empty(x_values.size, dtype=np.float32) + + for i, x_value in enumerate(x_values): + y_predictions[i] = invoke_tflite_interpreter( + input_shape, + tflite_interpreter, + x_value, + input_details['index'], + output_details['index'], + ) + return y_predictions + + +def main(_): + model_path = os.path.join(_PREFIX_PATH, 'models/hello_world_float.tflite') + + x_values = generate_random_input() + + # Calculate the corresponding sine values + y_true_values = np.sin(x_values).astype(np.float32) + + if _USE_TFLITE_INTERPRETER.value: + y_predictions = get_tflite_prediction(model_path, x_values) + plt.plot(x_values, y_predictions, 'b.', label='TFLite Prediction') + else: + y_predictions = get_tflm_prediction(model_path, x_values) + plt.plot(x_values, y_predictions, 'b.', label='TFLM Prediction') + + plt.plot(x_values, y_true_values, 'r.', label='Actual values') + plt.legend() + plt.show() + + +if __name__ == '__main__': + app.run(main) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc new file mode 100644 index 000000000..0b5d0663a --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc @@ -0,0 +1,176 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.h" +#include "tensorflow/lite/micro/examples/hello_world/models/hello_world_int8_model_data.h" +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/micro/micro_mutable_op_resolver.h" +#include "tensorflow/lite/micro/system_setup.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace { +using HelloWorldOpResolver = tflite::MicroMutableOpResolver<1>; + +TfLiteStatus RegisterOps(HelloWorldOpResolver& op_resolver) { + TF_LITE_ENSURE_STATUS(op_resolver.AddFullyConnected()); + return kTfLiteOk; +} +} // namespace + +TfLiteStatus LoadFloatModelAndPerformInference() { + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. + const tflite::Model* model = + ::tflite::GetModel(g_hello_world_float_model_data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + MicroPrintf( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model->version(), TFLITE_SCHEMA_VERSION); + } + + HelloWorldOpResolver op_resolver; + TF_LITE_ENSURE_STATUS(RegisterOps(op_resolver)); + + // Arena size just a round number. The exact arena usage can be determined + // using the RecordingMicroInterpreter. + constexpr int kTensorArenaSize = 3000; + uint8_t tensor_arena[kTensorArenaSize]; + + // Build an interpreter to run the model with + tflite::MicroInterpreter interpreter(model, op_resolver, tensor_arena, + kTensorArenaSize); + + // Allocate memory from the tensor_arena for the model's tensors + if (interpreter.AllocateTensors() != kTfLiteOk) { + MicroPrintf("Allocate tensor failed."); + return kTfLiteError; + } + + // Obtain a pointer to the model's input tensor + TfLiteTensor* input = interpreter.input(0); + + // Make sure the input has the properties we expect + if (input == nullptr) { + MicroPrintf("Input tensor is null."); + return kTfLiteError; + } + + // Obtain a pointer to the output tensor. + TfLiteTensor* output = interpreter.output(0); + + // Check if the output is within a small range of the expected output + float epsilon = 0.05f; + + constexpr int kNumTestValues = 4; + float golden_inputs[kNumTestValues] = {0.f, 1.f, 3.f, 5.f}; + + for (int i = 0; i < kNumTestValues; ++i) { + input->data.f[0] = golden_inputs[i]; + interpreter.Invoke(); + float y_pred = output->data.f[0]; + if (abs(sin(golden_inputs[i]) - y_pred) > epsilon) { + MicroPrintf( + "Difference between predicted and actual y value " + "is significant."); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +TfLiteStatus LoadQuantModelAndPerformInference() { + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. + const tflite::Model* model = + ::tflite::GetModel(g_hello_world_int8_model_data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + MicroPrintf( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model->version(), TFLITE_SCHEMA_VERSION); + } + + HelloWorldOpResolver op_resolver; + TF_LITE_ENSURE_STATUS(RegisterOps(op_resolver)); + + // Arena size just a round number. The exact arena usage can be determined + // using the RecordingMicroInterpreter. + constexpr int kTensorArenaSize = 2056; + uint8_t tensor_arena[kTensorArenaSize]; + + // Build an interpreter to run the model with + tflite::MicroInterpreter interpreter(model, op_resolver, tensor_arena, + kTensorArenaSize); + + // Allocate memory from the tensor_arena for the model's tensors + if (interpreter.AllocateTensors() != kTfLiteOk) { + MicroPrintf("Allocate tensor failed."); + return kTfLiteError; + } + + // Obtain a pointer to the model's input tensor + TfLiteTensor* input = interpreter.input(0); + + // Make sure the input has the properties we expect + if (input == nullptr) { + MicroPrintf("Input tensor is null."); + return kTfLiteError; + } + + // Get the input quantization parameters + float input_scale = input->params.scale; + int input_zero_point = input->params.zero_point; + + // Obtain a pointer to the output tensor. + TfLiteTensor* output = interpreter.output(0); + + // Get the output quantization parameters + float output_scale = output->params.scale; + int output_zero_point = output->params.zero_point; + + // Check if the output is within a small range of the expected output + float epsilon = 0.05f; + + constexpr int kNumTestValues = 4; + float golden_inputs[kNumTestValues] = {0.f, 1.f, 3.f, 5.f}; + + for (int i = 0; i < kNumTestValues; ++i) { + input->data.int8[0] = golden_inputs[i] / input_scale + input_zero_point; + interpreter.Invoke(); + float y_pred = (output->data.int8[0] - output_zero_point) * output_scale; + if (abs(sin(golden_inputs[i]) - y_pred) > epsilon) { + MicroPrintf( + "Difference between predicted and actual y value " + "is significant."); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +int main(int argc, char* argv[]) { + tflite::InitializeTarget(); + TF_LITE_ENSURE_STATUS(LoadFloatModelAndPerformInference()); + TF_LITE_ENSURE_STATUS(LoadQuantModelAndPerformInference()); + MicroPrintf("~~~ALL TESTS PASSED~~~\n"); + return kTfLiteOk; +} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py new file mode 100644 index 000000000..224ac725e --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py @@ -0,0 +1,103 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime +from tflite_micro.tensorflow.lite.micro.examples.hello_world import evaluate + +PREFIX_PATH = resource_loader.get_path_to_datafile('') + + +class HelloWorldQuantModelTest(test_util.TensorFlowTestCase): + model_path = os.path.join(PREFIX_PATH, 'models/hello_world_float.tflite') + input_shape = (1, 1) + output_shape = (1, 1) + # Create the tflm interpreter + tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path) + + # Get the metadata like scales and zero_points from the interpreter input/output + # details. + def get_quantization_params(self, interpreter_io_details): + quantize_params = interpreter_io_details.get('quantization_parameters') + scale = quantize_params.get('scales') + zero_point = quantize_params.get('zero_points') + return scale, zero_point + + def test_input(self): + input_details = self.tflm_interpreter.get_input_details(0) + input_scale, input_zero_point = self.get_quantization_params(input_details) + + self.assertAllEqual(input_details['shape'], self.input_shape) + self.assertEqual(input_details['dtype'], np.float32) + self.assertEqual(len(input_scale), 0) + self.assertEqual( + input_details['quantization_parameters']['quantized_dimension'], 0) + self.assertEqual(input_scale.dtype, np.float32) + self.assertEqual(input_zero_point.dtype, np.int32) + + def test_output(self): + output_details = self.tflm_interpreter.get_output_details(0) + output_scale, output_zero_point = self.get_quantization_params( + output_details) + self.assertAllEqual(output_details['shape'], self.output_shape) + self.assertEqual(output_details['dtype'], np.float32) + self.assertEqual(len(output_scale), 0) + self.assertEqual( + output_details['quantization_parameters']['quantized_dimension'], 0) + self.assertEqual(output_scale.dtype, np.float32) + self.assertEqual(output_zero_point.dtype, np.int32) + + def test_interpreter_prediction(self): + x_value = np.float32(0.0) + # Calculate the corresponding sine values + y_true = np.sin(x_value).astype(np.float32) + + input_shape = np.array( + self.tflm_interpreter.get_input_details(0).get('shape')) + + y_pred = evaluate.invoke_tflm_interpreter( + input_shape, + self.tflm_interpreter, + x_value, + input_index=0, + output_index=0, + ) + + epsilon = 0.05 + self.assertNear( + y_true, + y_pred, + epsilon, + 'hello_world model prediction is not close enough to numpy.sin value', + ) + + def test_compare_with_tflite(self): + x_values = evaluate.generate_random_input() + + tflm_y_predictions = evaluate.get_tflm_prediction(self.model_path, + x_values) + + tflite_y_predictions = evaluate.get_tflite_prediction( + self.model_path, x_values) + + self.assertAllEqual(tflm_y_predictions, tflite_y_predictions) + + +if __name__ == '__main__': + test.main() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png new file mode 100644 index 000000000..56b222169 Binary files /dev/null and b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png differ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png new file mode 100644 index 000000000..a89fa0866 Binary files /dev/null and b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png differ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD new file mode 100644 index 000000000..4f025b036 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD @@ -0,0 +1,37 @@ +load("//tensorflow/lite/micro:build_def.bzl", "generate_cc_arrays") + +package( + default_visibility = ["//visibility:public"], +) + +exports_files( + srcs = [ + "hello_world_float.tflite", + "hello_world_int8.tflite", + ], + visibility = ["//tensorflow/lite/micro/examples/hello_world:__subpackages__"], +) + +generate_cc_arrays( + name = "generated_hello_world_float_model_cc", + src = "hello_world_float.tflite", + out = "hello_world_float_model_data.cc", +) + +generate_cc_arrays( + name = "generated_hello_world_float_model_hdr", + src = "hello_world_float.tflite", + out = "hello_world_float_model_data.h", +) + +generate_cc_arrays( + name = "generated_hello_world_int8_model_cc", + src = "hello_world_int8.tflite", + out = "hello_world_int8_model_data.cc", +) + +generate_cc_arrays( + name = "generated_hello_world_int8_model_hdr", + src = "hello_world_int8.tflite", + out = "hello_world_int8_model_data.h", +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite new file mode 100644 index 000000000..f741b3a7b Binary files /dev/null and b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite differ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite new file mode 100644 index 000000000..9a379ea9d Binary files /dev/null and b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite differ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/BUILD new file mode 100644 index 000000000..1df5f87ec --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/BUILD @@ -0,0 +1,17 @@ +load("@tflm_pip_deps//:requirements.bzl", "requirement") + +py_binary( + name = "ptq", + srcs = ["ptq.py"], + data = ["//tensorflow/lite/micro/examples/hello_world/models:hello_world_float.tflite"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + "@absl_py//absl:app", + "@absl_py//absl/flags", + "@absl_py//absl/logging", + requirement("numpy"), + requirement("tensorflow-cpu"), + "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime", + ], +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/ptq.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/ptq.py new file mode 100644 index 000000000..bfab0d0e1 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/quantization/ptq.py @@ -0,0 +1,116 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""This script can create a quant(int8) model from the saved TF model. + +Run: +Build the train.py script +`bazel build tensorflow/lite/micro/examples/hello_world/quantization:train` + +The following command first creates the trained TF float model that we will quantize later +`bazel-bin/tensorflow/lite/micro/examples/hello_world/train --save_tf_model --save_dir=/tmp/float_model/` + +Build the ptq.py script +`bazel build tensorflow/lite/micro/examples/hello_world/quantization:ptq` + +Then we can run the ptq script to convert the float model to quant model as follows. +Note that we are using the directory of the TF model as the source_model_dir here. +The quant model (named hello_world_int8.tflite) will be created inside the target_dir. +`bazel-bin/tensorflow/lite/micro/examples/hello_world/quantization/ptq --source_model_dir=/tmp/float_model --target_dir=/tmp/quant_model/` +""" +import math +import os + +from absl import app +from absl import flags +from absl import logging +import numpy as np +import tensorflow as tf + +FLAGS = flags.FLAGS + +flags.DEFINE_string("source_model_dir", "/tmp/float_model/", + "the directory where the trained model can be found.") +flags.DEFINE_string("target_dir", "/tmp/quant_model", + "the directory to save the quant model.") + + +def get_data(): + """ + The code will generate a set of random `x` values + """ + # Generate a uniformly distributed set of random numbers in the range from + # 0 to 2Ï€, which covers a complete sine wave oscillation + x_values = np.random.uniform(low=0, high=2 * math.pi, + size=1000).astype(np.float32) + + # Shuffle the values to guarantee they're not in order + np.random.shuffle(x_values) + + return x_values + + +def save_tflite_model(tflite_model, target_dir, model_name): + """save the converted tflite model + Args: + tflite_model (binary): the converted model in serialized format. + save_dir (str): the save directory + model_name (str): model name to be saved + """ + if not os.path.exists(target_dir): + os.makedirs(target_dir) + save_path = os.path.join(target_dir, model_name) + with open(save_path, "wb") as f: + f.write(tflite_model) + logging.info("Tflite model saved to %s", target_dir) + + +def convert_quantized_tflite_model(source_model_dir, x_values): + """Convert the save TF model to tflite model, then save it as .tflite + flatbuffer format + + Args: + source_model_dir (tf.keras.Model): the trained hello_world flaot Model dir + x_train (numpy.array): list of the training data + + Returns: + The converted model in serialized format. + """ + + # Convert the model to the TensorFlow Lite format with quantization + def representative_dataset(num_samples=500): + for i in range(num_samples): + yield [x_values[i].reshape(1, 1)] + + converter = tf.lite.TFLiteConverter.from_saved_model(source_model_dir) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + converter.inference_input_type = tf.int8 + converter.inference_output_type = tf.int8 + converter.representative_dataset = representative_dataset + tflite_model = converter.convert() + return tflite_model + + +def main(_): + x_values = get_data() + quantized_tflite_model = convert_quantized_tflite_model( + FLAGS.source_model_dir, x_values) + save_tflite_model(quantized_tflite_model, + FLAGS.target_dir, + model_name="hello_world_int8.tflite") + + +if __name__ == "__main__": + app.run(main) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py new file mode 100644 index 000000000..3a2322ce9 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py @@ -0,0 +1,141 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""hellow_world model training for sinwave recognition + +Run: +`bazel build tensorflow/lite/micro/examples/hello_world:train` +`bazel-bin/tensorflow/lite/micro/examples/hello_world/train --save_tf_model --save_dir=/tmp/model_created/` +""" +import math +import os + +from absl import app +from absl import flags +from absl import logging +import numpy as np +import tensorflow as tf + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("epochs", 500, "number of epochs to train the model.") +flags.DEFINE_string("save_dir", "/tmp/hello_world_models", + "the directory to save the trained model.") +flags.DEFINE_boolean("save_tf_model", False, + "store the original unconverted tf model.") + + +def get_data(): + """ + The code will generate a set of random `x` values,calculate their sine + values. + """ + # Generate a uniformly distributed set of random numbers in the range from + # 0 to 2Ï€, which covers a complete sine wave oscillation + x_values = np.random.uniform(low=0, high=2 * math.pi, + size=1000).astype(np.float32) + + # Shuffle the values to guarantee they're not in order + np.random.shuffle(x_values) + + # Calculate the corresponding sine values + y_values = np.sin(x_values).astype(np.float32) + + return (x_values, y_values) + + +def create_model() -> tf.keras.Model: + model = tf.keras.Sequential() + + # First layer takes a scalar input and feeds it through 16 "neurons". The + # neurons decide whether to activate based on the 'relu' activation function. + model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(1, ))) + + # The new second and third layer will help the network learn more complex + # representations + model.add(tf.keras.layers.Dense(16, activation='relu')) + + # Final layer is a single neuron, since we want to output a single value + model.add(tf.keras.layers.Dense(1)) + + # Compile the model using the standard 'adam' optimizer and the mean squared + # error or 'mse' loss function for regression. + model.compile(optimizer='adam', loss='mse', metrics=['mae']) + + return model + + +def convert_tflite_model(model): + """Convert the save TF model to tflite model, then save it as .tflite flatbuffer format + Args: + model (tf.keras.Model): the trained hello_world Model + Returns: + The converted model in serialized format. + """ + converter = tf.lite.TFLiteConverter.from_keras_model(model) + tflite_model = converter.convert() + return tflite_model + + +def save_tflite_model(tflite_model, save_dir, model_name): + """save the converted tflite model + Args: + tflite_model (binary): the converted model in serialized format. + save_dir (str): the save directory + model_name (str): model name to be saved + """ + if not os.path.exists(save_dir): + os.makedirs(save_dir) + save_path = os.path.join(save_dir, model_name) + with open(save_path, "wb") as f: + f.write(tflite_model) + logging.info("Tflite model saved to %s", save_dir) + + +def train_model(epochs, x_values, y_values): + """Train keras hello_world model + Args: epochs (int) : number of epochs to train the model + x_train (numpy.array): list of the training data + y_train (numpy.array): list of the corresponding array + Returns: + tf.keras.Model: A trained keras hello_world model + """ + model = create_model() + model.fit(x_values, + y_values, + epochs=epochs, + validation_split=0.2, + batch_size=64, + verbose=2) + + if FLAGS.save_tf_model: + model.save(FLAGS.save_dir, save_format="tf") + logging.info("TF model saved to %s", FLAGS.save_dir) + + return model + + +def main(_): + x_values, y_values = get_data() + trained_model = train_model(FLAGS.epochs, x_values, y_values) + + # Convert and save the model to .tflite + tflite_model = convert_tflite_model(trained_model) + save_tflite_model(tflite_model, + FLAGS.save_dir, + model_name="hello_world_float.tflite") + + +if __name__ == "__main__": + app.run(main) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc index 573a4e57c..d2ceab5e3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc @@ -226,16 +226,22 @@ include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/micro_speech CCFLAGS := $(filter-out $(CC_WARNINGS),$(CCFLAGS)) # Test the code for feature generation. -$(eval $(call microlite_test,micro_features_generator_test,\ -$(MICRO_FEATURES_GENERATOR_TEST_SRCS),$(MICRO_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifi5 hifi3z)) + $(eval $(call microlite_test,micro_features_generator_test,\ + $(MICRO_FEATURES_GENERATOR_TEST_SRCS),$(MICRO_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +endif # Tests loading and running a speech model. $(eval $(call microlite_test,micro_speech_test,\ $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) -# Test the code for feature generation. -$(eval $(call microlite_test,simple_features_generator_test,\ -$(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +# TODO(b/268568089): This test is taking very long time to finish; causing the +# CI to run for a long time to finish. +ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifimini hifi5 hifi3z)) + # Test the code for feature generation. + $(eval $(call microlite_test,simple_features_generator_test,\ + $(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +endif # Tests the audio provider module. $(eval $(call microlite_test,audio_provider_test,\ @@ -246,12 +252,16 @@ $(eval $(call microlite_test,audio_provider_mock_test,\ $(AUDIO_PROVIDER_MOCK_TEST_SRCS),$(AUDIO_PROVIDER_MOCK_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) # Tests the feature provider module. -$(eval $(call microlite_test,feature_provider_test,\ -$(FEATURE_PROVIDER_TEST_SRCS),$(FEATURE_PROVIDER_TEST_HDRS))) +ifneq ($(TARGET_ARCH), hifi3z) + $(eval $(call microlite_test,feature_provider_test,\ + $(FEATURE_PROVIDER_TEST_SRCS),$(FEATURE_PROVIDER_TEST_HDRS))) +endif # Tests the feature provider module using the mock audio provider. -$(eval $(call microlite_test,feature_provider_mock_test,\ -$(FEATURE_PROVIDER_MOCK_TEST_SRCS),$(FEATURE_PROVIDER_MOCK_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +ifneq ($(TARGET_ARCH), hifi3z) + $(eval $(call microlite_test,feature_provider_mock_test,\ + $(FEATURE_PROVIDER_MOCK_TEST_SRCS),$(FEATURE_PROVIDER_MOCK_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS))) +endif # Tests the command recognizer module. $(eval $(call microlite_test,recognize_commands_test,\ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD index 069fde48a..9e1a4e60f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD @@ -30,7 +30,7 @@ py_test( srcs = ["evaluate_test.py"], data = [ "trained_lstm.tflite", - "trained_lstm_quant.tflite", + "trained_lstm_int8.tflite", ":sample_images", ], main = "evaluate_test.py", @@ -43,5 +43,6 @@ py_test( deps = [ ":evaluate", ":train", + "//tensorflow/lite/micro/tools:requantize_flatbuffer", ], ) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py index e64abe200..f2fdbf3ed 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py @@ -36,7 +36,6 @@ "the trained model path.") flags.DEFINE_string("img_path", "/tmp/samples/sample0.jpg", "path for the image to be predicted.") -flags.DEFINE_bool("quantized", False, "if the model is quantized") def read_img(img_path): @@ -62,39 +61,94 @@ def read_img(img_path): return data -def predict_image(interpreter, img_path, quantized=False): +def quantize_input_data(data, input_details): + """quantize the input data using scale and zero point + + Args: + data (np.array in float): input data for the interpreter + input_details : output of get_input_details from the tflm interpreter. + """ + # Get input quantization parameters + data_type = input_details["dtype"] + input_quantization_parameters = input_details["quantization_parameters"] + input_scale, input_zero_point = input_quantization_parameters["scales"][ + 0], input_quantization_parameters["zero_points"][0] + # quantize the input data + data = data / input_scale + input_zero_point + return data.astype(data_type) + + +def dequantize_output_data(data, output_details): + """Dequantize the data + + Args: + data (int8 or int16): integer data that need to be dequantized + output_details : output of get_output_details from the tflm interpreter. + """ + output_quantization_parameters = output_details["quantization_parameters"] + output_scale, output_zero_point = output_quantization_parameters["scales"][ + 0], output_quantization_parameters["zero_points"][0] + # Caveat: tflm_output_quant need to be converted to float to avoid integer overflow during dequantization + # e.g., (tflm_output_quant -output_zero_point) and (tflm_output_quant + (-output_zero_point)) + # can produce different results (int8 calculation) + return output_scale * (data.astype("float") - output_zero_point) + + +def tflm_predict(tflm_interpreter, data): + """Predict using the tflm interpreter + + Args: + tflm_interpreter (Interpreter): TFLM interpreter + data (np.array): data that need to be predicted + + Returns: + prediction (np.array): predicted results from the model using TFLM interpreter + """ + tflm_interpreter.set_input(data, 0) + tflm_interpreter.invoke() + return tflm_interpreter.get_output(0) + + +def predict(interpreter, data): """Use TFLM interpreter to predict a MNIST image Args: interpreter (tflm_runtime.Interpreter): the TFLM python interpreter - img_path (str): path to the image that need to be predicted - input_scale (float): quantization scale for the input tensor. Defaults to - 1 (no quantization) - quantized (bool): if the model is quantized + data (np.array): data to be predicted Returns: - np.array : predicted probability for each class (digit 0-9) + np.array : predicted probability (integer version if quantized) for each class (digit 0-9) """ - data = read_img(img_path) - # Quantize the input if necessary - if quantized: - # Get input quantization parameters (0 since input data has only one channel) - input_quantization_parameters = interpreter.get_input_details( - 0)["quantization_parameters"] - input_scale, input_zero_point = input_quantization_parameters["scales"][ - 0], input_quantization_parameters["zero_points"][0] - # quantize the input data - data = data / input_scale + input_zero_point - data = data.astype("int8") + input_details = interpreter.get_input_details(0) + # Quantize the input if the model is quantized + if input_details["dtype"] != np.float32: + data = quantize_input_data(data, input_details) interpreter.set_input(data, 0) interpreter.invoke() tflm_output = interpreter.get_output(0) + # LSTM is stateful, reset the state after the usage since each image is independent interpreter.reset() - # One image per time (i.e., remove the batch dimention) - # Note: quantized output (dtpe int8) is converted to float to avoid integer overflow during dequantization - return tflm_output[0].astype("float") + output_details = interpreter.get_output_details(0) + if output_details["dtype"] == np.float32: + return tflm_output[0].astype("float") + # Dequantize the output for quantized model + return dequantize_output_data(tflm_output[0], output_details) + + +def predict_image(interpreter, image_path): + """Use TFLM interpreter to predict a MNIST image + + Args: + interpreter (tflm_runtime.Interpreter): the TFLM python interpreter + image_path (str): path for the image that need to be tested + + Returns: + np.array : predicted probability (integer version if quantized) for each class (digit 0-9) + """ + data = read_img(image_path) + return predict(interpreter, data) def main(_): @@ -105,8 +159,7 @@ def main(_): raise ValueError("Image file does not exist. Please check the image path.") tflm_interpreter = tflm_runtime.Interpreter.from_file(FLAGS.model_path) - category_probabilities = predict_image(tflm_interpreter, FLAGS.img_path, - FLAGS.quantized) + category_probabilities = predict_image(tflm_interpreter, FLAGS.img_path) predicted_category = np.argmax(category_probabilities) logging.info("Model predicts the image as %i with probability %.2f", predicted_category, category_probabilities[predicted_category]) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py index 406e7a868..1092a7852 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py @@ -22,18 +22,19 @@ from tensorflow.python.platform import test from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime from tflite_micro.tensorflow.lite.micro.examples.mnist_lstm import evaluate +from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer PREFIX_PATH = resource_loader.get_path_to_datafile("") class LSTMFloatModelTest(test_util.TensorFlowTestCase): - model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite") - input_shape = (1, 28, 28) - output_shape = (1, 10) - - tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path) - np.random.seed(42) #Seed the random number generator + def setUp(self): + self.model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite") + self.input_shape = (1, 28, 28) + self.output_shape = (1, 10) + self.tflm_interpreter = tflm_runtime.Interpreter.from_file(self.model_path) + np.random.seed(42) #Seed the random number generator def testInputErrHandling(self): wrong_size_image_path = os.path.join(PREFIX_PATH, "samples/resized9.png") @@ -67,9 +68,7 @@ def testCompareWithTFLite(self): tflite_output_details["index"]) # Run inference on TFLM - self.tflm_interpreter.set_input(data_x, 0) - self.tflm_interpreter.invoke() - tflm_output = self.tflm_interpreter.get_output(0) + tflm_output = evaluate.tflm_predict(self.tflm_interpreter, data_x) # Check that TFLM has correct output self.assertDTypeEqual(tflm_output, np.float32) @@ -89,31 +88,28 @@ def testModelAccuracy(self): self.assertEqual(predicted_category, label) -class LSTMQuantModelTest(test_util.TensorFlowTestCase): - - quant_model_path = os.path.join(PREFIX_PATH, "trained_lstm_quant.tflite") - input_shape = (1, 28, 28) - output_shape = (1, 10) +class LSTMInt8ModelTest(test_util.TensorFlowTestCase): - tflm_interpreter_quant = tflm_runtime.Interpreter.from_file(quant_model_path) - np.random.seed(42) #Seed the random number generator + def setUp(self): + self.int8_model_path = os.path.join(PREFIX_PATH, + "trained_lstm_int8.tflite") + self.input_shape = (1, 28, 28) + self.output_shape = (1, 10) + self.tflm_interpreter_quant = tflm_runtime.Interpreter.from_file( + self.int8_model_path) + np.random.seed(42) #Seed the random number generator def testQuantOutputs(self): - # Get input/output quantization parameters - input_quantization_parameters = self.tflm_interpreter_quant.get_input_details( - 0)["quantization_parameters"] - output_quantization_parameters = self.tflm_interpreter_quant.get_output_details( - 0)["quantization_parameters"] - input_scale, input_zero_point = input_quantization_parameters["scales"][ - 0], input_quantization_parameters["zero_points"][0] - output_scale, output_zero_point = output_quantization_parameters["scales"][ - 0], output_quantization_parameters["zero_points"][0] + # Get input/output information of the quantized model + input_details = self.tflm_interpreter_quant.get_input_details(0) + output_details = self.tflm_interpreter_quant.get_output_details(0) + # Create a float model for results comparison float_model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite") tflm_interpreter_float = tflm_runtime.Interpreter.from_file( float_model_path) - num_test = 100 + num_test = 10 for _ in range(num_test): # Clear the internal states of the TfLite and TFLM interpreters so that we can call invoke multiple times (LSTM is stateful). self.tflm_interpreter_quant.reset() @@ -123,28 +119,21 @@ def testQuantOutputs(self): data_x = data_x.astype("float32") # Run float inference on TFLM - tflm_interpreter_float.set_input(data_x, 0) - tflm_interpreter_float.invoke() - tflm_output_float = tflm_interpreter_float.get_output(0) + tflm_output_float = evaluate.tflm_predict(tflm_interpreter_float, data_x) # Quantized the input data into int8 - data_x_quant = data_x / input_scale + input_zero_point - data_x_quant = data_x_quant.astype("int8") + data_x_quant = evaluate.quantize_input_data(data_x, input_details) # Run integer inference on the quantilzed TFLM model - self.tflm_interpreter_quant.set_input(data_x_quant, 0) - self.tflm_interpreter_quant.invoke() - tflm_output_quant = self.tflm_interpreter_quant.get_output(0) + tflm_output_quant = evaluate.tflm_predict(self.tflm_interpreter_quant, + data_x_quant) # Check shape and type self.assertDTypeEqual(tflm_output_quant, np.int8) self.assertEqual(tflm_output_quant.shape, self.output_shape) # Convert the integer output back to float for comparison - # Caveat: tflm_output_quant need to be converted to float to avoid integer overflow during dequantization - # e.g., (tflm_output_quant -output_zero_point) and (tflm_output_quant + (-output_zero_point)) - # can produce different results (int8 calculation) - tflm_output_quant_float = output_scale * ( - tflm_output_quant.astype("float") - output_zero_point) + tflm_output_quant_float = evaluate.dequantize_output_data( + tflm_output_quant, output_details) # Make sure the difference is within the error margin self.assertAllLess(abs(tflm_output_float - tflm_output_quant_float), 1e-2) @@ -155,7 +144,75 @@ def testQuantModelAccuracy(self): # Run integer inference (quantized) on the sample image # Note that the TFLM state is reset inside the predict_image function. category_probabilities_quant = evaluate.predict_image( - self.tflm_interpreter_quant, image_path, quantized=True) + self.tflm_interpreter_quant, image_path) + # Check the prediction result + predicted_category = np.argmax(category_probabilities_quant) + # Check the prediction + self.assertEqual(predicted_category, label) + + +class LSTMInt16ModelTest(test_util.TensorFlowTestCase): + + def setUp(self): + # Convert the int8 model to int16 + self.int8_model_path = os.path.join(PREFIX_PATH, + "trained_lstm_int8.tflite") + self.requantizer = requantize_flatbuffer.Requantizer.from_file( + self.int8_model_path) + self.requantizer.requantize_8to16() + self.int16_model = self.requantizer.model_bytearray() + self.input_shape = (1, 28, 28) + self.output_shape = (1, 10) + self.tflm_interpreter_quant = tflm_runtime.Interpreter.from_bytes( + self.int16_model) + np.random.seed(42) #Seed the random number generator + + def testQuantOutputs(self): + # Get input/output information + input_details = self.tflm_interpreter_quant.get_input_details(0) + output_details = self.tflm_interpreter_quant.get_output_details(0) + + # Create a float model for results comparison + float_model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite") + tflm_interpreter_float = tflm_runtime.Interpreter.from_file( + float_model_path) + + num_test = 10 + for _ in range(num_test): + # Clear the internal states of the TfLite and TFLM interpreters so that we can call invoke multiple times (LSTM is stateful). + self.tflm_interpreter_quant.reset() + tflm_interpreter_float.reset() + + data_x = np.random.random(self.input_shape) + data_x = data_x.astype("float32") + + # Run float inference on TFLM + tflm_output_float = evaluate.tflm_predict(tflm_interpreter_float, data_x) + + # Quantized the input data into int8 + data_x_quant = evaluate.quantize_input_data(data_x, input_details) + + # Run integer inference on the quantilzed TFLM model + tflm_output_quant = evaluate.tflm_predict(self.tflm_interpreter_quant, + data_x_quant) + # Check shape and type + self.assertDTypeEqual(tflm_output_quant, np.int16) + self.assertEqual(tflm_output_quant.shape, self.output_shape) + + # Convert the integer output back to float for comparison + tflm_output_quant_float = evaluate.dequantize_output_data( + tflm_output_quant, output_details) + # Make sure the difference is within the error margin + self.assertAllLess(abs(tflm_output_float - tflm_output_quant_float), + 1e-3) + + def testQuantModelAccuracy(self): + for label in range(10): + image_path = os.path.join(PREFIX_PATH, f"samples/sample{label}.png") + # Run integer inference (quantized) on the sample image + # Note that the TFLM state is reset inside the predict_image function. + category_probabilities_quant = evaluate.predict_image( + self.tflm_interpreter_quant, image_path) # Check the prediction result predicted_category = np.argmax(category_probabilities_quant) # Check the prediction diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite new file mode 100644 index 000000000..636ea0bbe Binary files /dev/null and b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite differ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc index 0cbca9e84..c142c7ddc 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc @@ -54,9 +54,13 @@ $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/models/person_detec #Find any platform - specific rules for this example. include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/person_detection/*/Makefile.inc) -# Tests loading and running a vision model. -$(eval $(call microlite_test,person_detection_test,\ -$(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS))) +# TODO(b/268568089): This test is taking very long time to finish; causing the +# CI to run for a long time to finish. +ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifimini hifi3z)) + # Tests loading and running a vision model. + $(eval $(call microlite_test,person_detection_test,\ + $(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS))) +endif # Tests the image provider module. $(eval $(call microlite_test,image_provider_test,\ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.cc b/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.cc index 81f74ae36..03ea6dfc7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.cc @@ -39,16 +39,26 @@ FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors, allocator_(allocator) {} TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) { - allocated_tensor_count_++; + allocated_temp_count_++; return &tensors_[tensor_index]; } void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { - allocated_tensor_count_--; + allocated_temp_count_--; } bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() { - return !allocated_tensor_count_; + return !allocated_temp_count_; +} + +uint8_t* FakeMicroContext::AllocateTempBuffer(size_t size, size_t alignment) { + allocated_temp_count_++; + return allocator_->AllocateTemp(size, alignment); +} + +void FakeMicroContext::DeallocateTempBuffer(uint8_t* buffer) { + allocated_temp_count_--; + allocator_->DeallocateTemp(buffer); } TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) { diff --git a/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.h b/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.h index 31b39d384..b068f3263 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/fake_micro_context.h @@ -21,6 +21,10 @@ limitations under the License. namespace tflite { // A fake of MicroContext for kernel util tests. +// TODO(b/272759060): FakeMicroContext currently inherits from MicroContext. +// Which allow tests to use functions from MicroContext that weren't added to +// FakeMicroContext in tests. This should be looked into further. + class FakeMicroContext : public MicroContext { public: FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator, @@ -35,6 +39,9 @@ class FakeMicroContext : public MicroContext { void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override; bool IsAllTempTfLiteTensorDeallocated(); + uint8_t* AllocateTempBuffer(size_t size, size_t alignment) override; + void DeallocateTempBuffer(uint8_t* buffer) override; + TfLiteEvalTensor* GetEvalTensor(int tensor_index) override; private: @@ -44,7 +51,7 @@ class FakeMicroContext : public MicroContext { uint8_t* scratch_buffers_[kNumScratchBuffers_]; TfLiteTensor* tensors_; - int allocated_tensor_count_ = 0; + int allocated_temp_count_ = 0; SingleArenaBufferAllocator* allocator_; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations.cc index 716dd6fc7..3227ffbf0 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations.cc @@ -109,11 +109,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_RELU() { +TfLiteRegistration_V1 Register_RELU() { return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval); } -TfLiteRegistration Register_RELU6() { +TfLiteRegistration_V1 Register_RELU6() { return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc index 4403edc87..2ec3a1bf5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc @@ -55,8 +55,8 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, ReluOpData* data) { float act_min = 0.0; float act_max = std::numeric_limits::infinity(); - double real_multiplier = static_cast(input->params.scale) / - static_cast(output->params.scale); + double real_multiplier = + static_cast(input->params.scale / output->params.scale); const RuntimeShape input_shape = GetTensorShape(input); const RuntimeShape output_shape = GetTensorShape(output); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.cc index 81b3b9c9b..d3947bfaf 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/add.h" +#include + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" @@ -30,28 +32,60 @@ limitations under the License. namespace tflite { -void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpDataAdd* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min_f32, - data->output_activation_max_f32, &op_params); - if (data->requires_broadcast) { - reference_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); +TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpDataAdd* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + switch (output->type) { + case kTfLiteFloat32: { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + case kTfLiteInt32: { + tflite::ArithmeticParams op_params; + SetActivationParams(std::numeric_limits::lowest(), + std::numeric_limits::max(), &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; } + + return kTfLiteOk; } TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, @@ -144,8 +178,9 @@ TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); - if (output->type == kTfLiteFloat32) { - EvalAdd(context, node, params, data, input1, input2, output); + if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { + TF_LITE_ENSURE_OK( + context, EvalAdd(context, node, params, data, input1, input2, output)); } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, input1, input2, output)); @@ -158,7 +193,7 @@ TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration Register_ADD() { +TfLiteRegistration_V1 Register_ADD() { return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h index e2e5d23ba..0c902af75 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h @@ -60,17 +60,17 @@ TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params, TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node); // Generic must define registration function. -TfLiteRegistration Register_ADD(); +TfLiteRegistration_V1 Register_ADD(); #if defined(CMSIS_NN) -TfLiteRegistration Register_ADD_INT8(); +TfLiteRegistration_V1 Register_ADD_INT8(); -TfLiteRegistration Register_ADD_INT16(); +TfLiteRegistration_V1 Register_ADD_INT16(); #else // Fallback registration -inline TfLiteRegistration Register_ADD_INT8() { return Register_ADD(); } +inline TfLiteRegistration_V1 Register_ADD_INT8() { return Register_ADD(); } -inline TfLiteRegistration Register_ADD_INT16() { return Register_ADD(); } +inline TfLiteRegistration_V1 Register_ADD_INT16() { return Register_ADD(); } #endif } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_common.cc index b285b800c..cc9450913 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_common.cc @@ -39,6 +39,8 @@ TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params, data->requires_broadcast = !HaveSameShapes(input1, input2); if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TFLITE_CHECK_NE(output->quantization.type, kTfLiteNoQuantization); + // 8bit -> 8bit general quantized path, with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; @@ -97,6 +99,14 @@ TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS( CalculateOpDataAdd(context, params, input1, input2, output, data)); + if (output->type == kTfLiteInt32) { + // Only support int32 unquantized add for now. + TF_LITE_ENSURE_EQ(context, input1->quantization.type, + kTfLiteNoQuantization); + TF_LITE_ENSURE_EQ(context, input2->quantization.type, + kTfLiteNoQuantization); + } + micro_context->DeallocateTempTfLiteTensor(input1); micro_context->DeallocateTempTfLiteTensor(input2); micro_context->DeallocateTempTfLiteTensor(output); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_n.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_n.cc index 1139e1a95..eea554be5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_n.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add_n.cc @@ -208,7 +208,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_ADD_N() { +TfLiteRegistration_V1 Register_ADD_N() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/arg_min_max.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/arg_min_max.cc index 7c78e475f..c38c19b36 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/arg_min_max.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/arg_min_max.cc @@ -107,11 +107,11 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_ARG_MAX() { +TfLiteRegistration_V1 Register_ARG_MAX() { return tflite::micro::RegisterOp(nullptr, nullptr, ArgMaxEval); } -TfLiteRegistration Register_ARG_MIN() { +TfLiteRegistration_V1 Register_ARG_MIN() { return tflite::micro::RegisterOp(nullptr, nullptr, ArgMinEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/assign_variable.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/assign_variable.cc index f3aa12fa4..d59dd5642 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/assign_variable.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/assign_variable.cc @@ -94,7 +94,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_ASSIGN_VARIABLE() { +TfLiteRegistration_V1 Register_ASSIGN_VARIABLE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/batch_to_space_nd.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/batch_to_space_nd.cc index 83fb35688..29ca2ff94 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/batch_to_space_nd.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/batch_to_space_nd.cc @@ -105,7 +105,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_BATCH_TO_SPACE_ND() { +TfLiteRegistration_V1 Register_BATCH_TO_SPACE_ND() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_args.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_args.cc index be2672ec9..a526971ce 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_args.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_args.cc @@ -83,7 +83,7 @@ TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_BROADCAST_ARGS() { +TfLiteRegistration_V1 Register_BROADCAST_ARGS() { return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare, BroadcastArgsEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_to.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_to.cc index 63a14db25..9a32331f5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_to.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/broadcast_to.cc @@ -115,7 +115,7 @@ TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_BROADCAST_TO() { +TfLiteRegistration_V1 Register_BROADCAST_TO() { return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare, BroadcastToEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/call_once.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/call_once.cc index 200242b2c..9fdf7d05e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/call_once.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/call_once.cc @@ -81,7 +81,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_CALL_ONCE() { +TfLiteRegistration_V1 Register_CALL_ONCE() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/cast.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/cast.cc index 0a0204d2e..6dd20d1fd 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/cast.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/cast.cc @@ -107,7 +107,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_CAST() { +TfLiteRegistration_V1 Register_CAST() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc index a390a7355..5716afef3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,9 +21,8 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace ceil { + +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; @@ -64,12 +63,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace ceil -TfLiteRegistration Register_CEIL() { - return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval); +} // namespace + +TfLiteRegistration_V1 Register_CEIL() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/circular_buffer.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/circular_buffer.cc index 9779c32d9..e598fc5ad 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/circular_buffer.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/circular_buffer.cc @@ -108,8 +108,8 @@ TfLiteStatus CircularBufferEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration* Register_CIRCULAR_BUFFER() { - static TfLiteRegistration r = tflite::micro::RegisterOp( +TfLiteRegistration_V1* Register_CIRCULAR_BUFFER() { + static TfLiteRegistration_V1 r = tflite::micro::RegisterOp( CircularBufferInit, CircularBufferPrepare, CircularBufferEval); return &r; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc index 597856cd2..76a820a87 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,9 +22,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace comparisons { + namespace { struct OpData { @@ -530,8 +528,6 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -581,38 +577,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace comparisons +} // namespace -TfLiteRegistration Register_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::EqualEval); +TfLiteRegistration_V1 Register_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, EqualEval); } -TfLiteRegistration Register_NOT_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::NotEqualEval); +TfLiteRegistration_V1 Register_NOT_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, NotEqualEval); } -TfLiteRegistration Register_GREATER() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::GreaterEval); +TfLiteRegistration_V1 Register_GREATER() { + return tflite::micro::RegisterOp(Init, Prepare, GreaterEval); } -TfLiteRegistration Register_GREATER_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::GreaterEqualEval); +TfLiteRegistration_V1 Register_GREATER_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, GreaterEqualEval); } -TfLiteRegistration Register_LESS() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::LessEval); +TfLiteRegistration_V1 Register_LESS() { + return tflite::micro::RegisterOp(Init, Prepare, LessEval); } -TfLiteRegistration Register_LESS_EQUAL() { - return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare, - comparisons::LessEqualEval); +TfLiteRegistration_V1 Register_LESS_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, LessEqualEval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc index a3f1cc346..9decf7279 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace concatenation { + +namespace { constexpr int kMaxInputNum = 10; // Maximum number of input tensors constexpr int kOutputTensor = 0; @@ -251,13 +250,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace concatenation +} // namespace -TfLiteRegistration Register_CONCATENATION() { - return tflite::micro::RegisterOp(concatenation::Init, concatenation::Prepare, - concatenation::Eval); +TfLiteRegistration_V1 Register_CONCATENATION() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc index 139eda7f5..edd1bd8a7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/conv.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -113,14 +114,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt4: { int8_t* unpacked_filter_data = static_cast( context->GetScratchBuffer(context, data.filter_buffer_index)); - reference_integer_ops::ConvPerChannelWithPackedInt4Weights( + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::ConvPerChannel( ConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); @@ -157,7 +161,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_CONV_2D() { +TfLiteRegistration_V1 Register_CONV_2D() { return tflite::micro::RegisterOp(Init, ConvPrepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.h index 06e9db43a..d65457d41 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.h @@ -76,37 +76,39 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node, TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node); -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_CONV_2D(); +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +TfLiteRegistration_V1 Register_CONV_2D(); #if defined(XTENSA) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8 activations and int8 weights and always calls the reference // implementation. -TfLiteRegistration Register_CONV_2D_INT8REF(); +TfLiteRegistration_V1 Register_CONV_2D_INT8REF(); #else -inline TfLiteRegistration Register_CONV_2D_INT8REF() { +inline TfLiteRegistration_V1 Register_CONV_2D_INT8REF() { return Register_CONV_2D(); } #endif #if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8 activations and int8 weights and uses the latency optimized // implementations. -TfLiteRegistration Register_CONV_2D_INT8(); +TfLiteRegistration_V1 Register_CONV_2D_INT8(); -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int16 activations and int8 weights and uses the latency optimized // implementations. -TfLiteRegistration Register_CONV_2D_INT16(); +TfLiteRegistration_V1 Register_CONV_2D_INT16(); #else -inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); } +inline TfLiteRegistration_V1 Register_CONV_2D_INT8() { + return Register_CONV_2D(); +} -inline TfLiteRegistration Register_CONV_2D_INT16() { +inline TfLiteRegistration_V1 Register_CONV_2D_INT16() { return Register_CONV_2D(); } #endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc index c5519b544..98c2615db 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc @@ -61,6 +61,10 @@ static TfLiteConvParams common_conv_params = { TF_LITE_MICRO_TESTS_BEGIN +#if !defined(VISION_P6) // TODO(b/270720625): disabled int8 and int4 test for +// conv for fully connected vision p6 kernels, because vision p6 conv doesn't +// work with per channel quantization + TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) { const int output_dims_count = 12; int8_t output_data[output_dims_count]; @@ -90,6 +94,39 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) { &tflite::testing::common_conv_params, tflite::Register_CONV_2D(), output_data, kTfLiteInt4)); } + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { + const int output_dims_count = 12; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[tflite::testing::kInputElements]; + int8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + int8_t golden_quantized[tflite::testing::kOutputElements]; + int zero_points[tflite::testing::kBiasElements + 1]; + float scales[tflite::testing::kBiasElements + 1]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInputShape, tflite::testing::kInputData, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShape, tflite::testing::kFilterData, + filter_quantized, tflite::testing::kBiasShape, + tflite::testing::kBiasData, bias_quantized, scales, zero_points, + tflite::testing::kOutputShape, tflite::testing::kGoldenData, + golden_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, tflite::Register_CONV_2D(), + output_data)); +} + +#endif // !defined(VISION_P6) + #if !defined(XTENSA) // TODO(b/170321206): xtensa kernels are less general than // reference kernels and we ifdef out test cases that are // currently known to fail. @@ -188,36 +225,6 @@ TF_LITE_MICRO_TEST(HybridModeIsError) { tflite::Register_CONV_2D(), output_data)); } -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5f; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[tflite::testing::kInputElements]; - int8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; - - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, - tflite::testing::TestConvQuantizedPerChannel( - tflite::testing::kInputShape, tflite::testing::kInputData, - input_quantized, input_scale, input_zero_point, - tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, tflite::testing::kBiasShape, - tflite::testing::kBiasData, bias_quantized, scales, zero_points, - tflite::testing::kOutputShape, tflite::testing::kGoldenData, - golden_quantized, output_scale, output_zero_point, - &tflite::testing::common_conv_params, tflite::Register_CONV_2D(), - output_data)); -} - TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBias) { const int output_dims_count = 12; int16_t output_data[output_dims_count]; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.h index aa7ea4436..5ea0261e3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.h @@ -28,35 +28,37 @@ namespace testing { TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, float* output_data); + TfLiteRegistration_V1 registration, float* output_data); TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data); + TfLiteRegistration_V1 registration, + int8_t* output_data); TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, uint8_t* output_data); + TfLiteRegistration_V1 registration, + uint8_t* output_data); TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const float* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, float* output_data, float tolerance = 1e-5); TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const int8_t* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, int8_t* output_data, float tolerance = 1e-5); TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const uint8_t* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, uint8_t* output_data, float tolerance = 1e-5); TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, @@ -65,7 +67,8 @@ TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, int* output_dims_data, const float* expected_output_data, TfLiteConvParams* conv_params, - TfLiteRegistration registration, float* output_data); + TfLiteRegistration_V1 registration, + float* output_data); TfLiteStatus TestConvQuantizedPerLayer( int* input_dims_data, const float* input_data, uint8_t* input_quantized, @@ -74,7 +77,7 @@ TfLiteStatus TestConvQuantizedPerLayer( const float* bias_data, int32_t* bias_quantized, int* output_dims_data, const float* expected_output_data, uint8_t* expected_output_quantized, float output_scale, TfLiteConvParams* conv_params, - TfLiteRegistration registration, uint8_t* output_data); + TfLiteRegistration_V1 registration, uint8_t* output_data); TfLiteStatus TestConvQuantizedPerChannel( int* input_dims_data, const float* input_data, int8_t* input_quantized, @@ -84,7 +87,7 @@ TfLiteStatus TestConvQuantizedPerChannel( float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int8_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data, + TfLiteRegistration_V1 registration, int8_t* output_data, TfLiteType tensor_weight_type = kTfLiteNoType); TfLiteStatus TestConvQuantizedPerChannel( @@ -96,7 +99,7 @@ TfLiteStatus TestConvQuantizedPerChannel( int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int16_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data); + TfLiteRegistration_V1 registration, int16_t* output_data); TfLiteStatus TestConvQuantizedPerChannel( int* input_dims_data, const float* input_data, int16_t* input_quantized, @@ -106,7 +109,7 @@ TfLiteStatus TestConvQuantizedPerChannel( float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int16_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data); + TfLiteRegistration_V1 registration, int16_t* output_data); } // namespace testing } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test_common.cc index 11dc8118c..c925dee33 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test_common.cc @@ -21,7 +21,7 @@ namespace testing { template TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, T* output_data) { + TfLiteRegistration_V1 registration, T* output_data) { int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; @@ -43,7 +43,7 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const T* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, T* output_data, float tolerance) { TfLiteStatus status = InvokeConv(tensors, tensors_size, output_length, conv_params, registration, output_data); @@ -59,14 +59,16 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, float* output_data) { + TfLiteRegistration_V1 registration, + float* output_data) { return InvokeConv(tensors, tensors_size, output_length, conv_params, registration, output_data); } TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data) { + TfLiteRegistration_V1 registration, + int8_t* output_data) { return InvokeConv(tensors, tensors_size, output_length, conv_params, registration, output_data); } @@ -75,7 +77,7 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const float* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, float* output_data, float tolerance) { return ValidateConvGoldens(tensors, tensors_size, expected_output_data, output_length, conv_params, registration, @@ -86,7 +88,7 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, const int8_t* expected_output_data, int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, int8_t* output_data, float tolerance) { return ValidateConvGoldens( tensors, tensors_size, expected_output_data, output_length, conv_params, @@ -99,7 +101,7 @@ TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, int* output_dims_data, const float* expected_output_data, TfLiteConvParams* conv_params, - TfLiteRegistration registration, + TfLiteRegistration_V1 registration, float* output_data) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); @@ -130,7 +132,7 @@ TfLiteStatus TestConvQuantizedPerChannel( float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, T* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, T* output_data, + TfLiteRegistration_V1 registration, T* output_data, TfLiteType tensor_weight_type = kTfLiteNoType) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); @@ -194,7 +196,7 @@ TfLiteStatus TestConvQuantizedPerChannel( float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int8_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data, + TfLiteRegistration_V1 registration, int8_t* output_data, TfLiteType tensor_weight_type) { return TestConvQuantizedPerChannel( input_dims_data, input_data, input_quantized, input_scale, @@ -215,7 +217,7 @@ TfLiteStatus TestConvQuantizedPerChannel( int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int16_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data) { + TfLiteRegistration_V1 registration, int16_t* output_data) { return TestConvQuantizedPerChannel( input_dims_data, input_data, input_quantized, input_scale, input_zero_point, filter_dims_data, filter_data, filter_data_quantized, @@ -234,7 +236,7 @@ TfLiteStatus TestConvQuantizedPerChannel( float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int16_t* expected_output_data_quantized, float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data) { + TfLiteRegistration_V1 registration, int16_t* output_data) { return TestConvQuantizedPerChannel( input_dims_data, input_data, input_quantized, input_scale, input_zero_point, filter_dims_data, filter_data, filter_data_quantized, diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/cumsum.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/cumsum.cc index 4f8a96591..1b005e6a2 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/cumsum.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/cumsum.cc @@ -168,7 +168,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_CUMSUM() { +TfLiteRegistration_V1 Register_CUMSUM() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depth_to_space.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depth_to_space.cc index 7f229fbf4..932e295c6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depth_to_space.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depth_to_space.cc @@ -135,7 +135,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_DEPTH_TO_SPACE() { +TfLiteRegistration_V1 Register_DEPTH_TO_SPACE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc index e872d4ac6..50a902096 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -66,31 +67,34 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteInt8: { switch (filter->type) { - case kTfLiteInt8: { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); reference_integer_ops::DepthwiseConvPerChannel( DepthwiseConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; } - case kTfLiteInt4: { - int8_t* unpacked_filter_data = static_cast( - context->GetScratchBuffer(context, data.filter_buffer_index)); - reference_integer_ops::DepthwiseConvPerChannelWithPackedInt4Weights( + case kTfLiteInt8: { + reference_integer_ops::DepthwiseConvPerChannel( DepthwiseConvParamsQuantized(params, data), data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); @@ -113,7 +117,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_DEPTHWISE_CONV_2D() { +TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D() { return tflite::micro::RegisterOp(Init, DepthwiseConvPrepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.h index 562438d7c..8bd5e4640 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.h @@ -49,28 +49,28 @@ TfLiteStatus CalculateOpDataDepthwiseConv( TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node); -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_DEPTHWISE_CONV_2D(); +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D(); #if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8 activations and int8 weights and uses the latency optimized // implementations. -TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8(); +TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D_INT8(); -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int16 activations and int8 weights and uses the latency optimized // implementations. -TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16(); +TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D_INT16(); #else -inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() { +inline TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D_INT8() { return Register_DEPTHWISE_CONV_2D(); } -inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() { +inline TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D_INT16() { return Register_DEPTHWISE_CONV_2D(); } #endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc index 2a0ae2f4c..6d5f6c271 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc @@ -188,6 +188,13 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) { affine_quantization->zero_point->size); } + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt8 && + (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)), + "Hybrid models are not supported on TFLite Micro."); + if (filter->type == kTfLiteInt4) { int filter_size = RuntimeShape(filter->dims->size, diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index e39f33d5e..3ab3e5879 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -1,3 +1,4 @@ + /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,15 +25,13 @@ namespace tflite { namespace testing { namespace { -#if !defined(XTENSA) // Needed to avoid build errors from unused variables. -constexpr int kMaxFilterChannels = 64; -constexpr int kMaxBiasChannels = 64; -#endif // !defined(XTENSA) - // Index of the output tensor in context->tensors, specific to // DepthwiseConv. constexpr int kOutputTensorIndex = 3; +constexpr int kMaxFilterChannels = 64; +constexpr int kMaxBiasChannels = 64; + // Creates a DepthwiseConv opeerator, calls it with the provided input tensors // and some defaults parameters, and compares the output with // expected_output_data. @@ -49,7 +48,7 @@ TfLiteStatus ValidateDepthwiseConvGoldens( int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = Register_DEPTHWISE_CONV_2D(); + const TfLiteRegistration_V1 registration = Register_DEPTHWISE_CONV_2D(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, outputs_array, reinterpret_cast(conv_params)); @@ -79,34 +78,6 @@ TfLiteStatus ValidateDepthwiseConvGoldens( return kTfLiteOk; } -#if !defined(XTENSA) // Needed to avoid build errors from unsused functions. -void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data, - int* filter_dims_data, const float* filter_data, - int* bias_dims_data, const float* bias_data, - const float* expected_output_data, - int* output_dims_data, - TfLiteDepthwiseConvParams* conv_params, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateTensor(input_data, input_dims), - CreateTensor(filter_data, filter_dims), - CreateTensor(bias_data, bias_dims), - CreateTensor(output_data, output_dims), - }; - - ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, - conv_params, 1e-5, tensors_size, tensors); -} - void TestDepthwiseConvQuantizedPerChannel( int* input_dims_data, const float* input_data, int8_t* input_quantized, float input_scale, int input_zero_point, int* filter_dims_data, @@ -176,6 +147,38 @@ void TestDepthwiseConvQuantizedPerChannel( 1.0, tensors_size, tensors)); } +// Xtensa kernels do not support float activations., and the corresponding tests +// are disabled. As a result, helper functions that are only needed for float +// kernel tests also need to be ifdef'd out to avoid build errors due to unused +// functions. +#if !defined(XTENSA) +void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data, + int* filter_dims_data, const float* filter_data, + int* bias_dims_data, const float* bias_data, + const float* expected_output_data, + int* output_dims_data, + TfLiteDepthwiseConvParams* conv_params, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateTensor(input_data, input_dims), + CreateTensor(filter_data, filter_dims), + CreateTensor(bias_data, bias_dims), + CreateTensor(output_data, output_dims), + }; + + ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, + conv_params, 1e-5, tensors_size, tensors); +} + #endif // !defined(XTENSA) } // namespace @@ -239,49 +242,6 @@ TF_LITE_MICRO_TEST(SimpleTestRelu) { bias_values, golden_relu, output_shape, &conv_params, output_data); } -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { - const int input_elements = 12; - int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 71, -34, 99, -20, 91, -26, 127, -4, - }; - int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[input_elements]; - int8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - int8_t golden_quantized[output_elements]; - - TfLiteDepthwiseConvParams conv_params; - conv_params.activation = kTfLiteActNone; - conv_params.dilation_width_factor = 1; - conv_params.dilation_height_factor = 1; - conv_params.stride_height = 1; - conv_params.stride_width = 1; - - tflite::testing::TestDepthwiseConvQuantizedPerChannel( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, bias_shape, bias_values, - bias_quantized, output_shape, golden, golden_quantized, output_data, - output_scale, output_zero_point, &conv_params); -} - TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelDepthMultiplier1) { const int input_elements = 12; int input_shape[] = {4, 1, 3, 2, 2}; @@ -460,54 +420,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) { golden, output_dims, &conv_params, output_float); } -// Quantizing int8-ranged filter values down to int4 doesn't always yield the -// accuracy sufficient to meet the golden values. So this test was created by -// handcrafting filter values within the int4 range, and the golden data was -// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8 -// quantization, and ensuring that int4 quantization yields the same outputs. -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) { - const int input_elements = 12; - int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -5, 7, -6, 7, - 5, 6, 7, 4, 2, -5, 4, 0}; - const int bias_elements = 4; - int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 0, 26, 29, 84, 6, 46, 45, 114, - }; - int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[input_elements]; - int8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - int8_t golden_quantized[output_elements]; - - TfLiteDepthwiseConvParams conv_params; - conv_params.activation = kTfLiteActNone; - conv_params.dilation_width_factor = 1; - conv_params.dilation_height_factor = 1; - conv_params.stride_height = 1; - conv_params.stride_width = 1; - - tflite::testing::TestDepthwiseConvQuantizedPerChannel( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, bias_shape, bias_values, - bias_quantized, output_shape, golden, golden_quantized, output_data, - output_scale, output_zero_point, &conv_params, kTfLiteInt4); -} - TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { const float input_scale = 1.0f; const float filter_scale = 1.0f; @@ -983,4 +895,98 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x1ShouldMatchGolden) { kQuantizationTolerance, kTensorsSize, tensors)); } +// TODO(b/268384678): xtensa vision p6 kernels break +// this test, will if def till properly investigated. + +// Quantizing int8-ranged filter values down to int4 doesn't always yield the +// accuracy sufficient to meet the golden values. So this test was created by +// handcrafting filter values within the int4 range, and the golden data was +// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8 +// quantization, and ensuring that int4 quantization yields the same outputs. +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) { + const int input_elements = 12; + int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -5, 7, -6, 7, + 5, 6, 7, 4, 2, -5, 4, 0}; + const int bias_elements = 4; + int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 0, 26, 29, 84, 6, 46, 45, 114, + }; + int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + + TfLiteDepthwiseConvParams conv_params; + conv_params.activation = kTfLiteActNone; + conv_params.dilation_width_factor = 1; + conv_params.dilation_height_factor = 1; + conv_params.stride_height = 1; + conv_params.stride_width = 1; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, &conv_params, kTfLiteInt4); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { + const int input_elements = 12; + int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 71, -34, 99, -20, 91, -26, 127, -4, + }; + int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + + TfLiteDepthwiseConvParams conv_params; + conv_params.activation = kTfLiteActNone; + conv_params.dilation_width_factor = 1; + conv_params.dilation_height_factor = 1; + conv_params.stride_height = 1; + conv_params.stride_width = 1; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, &conv_params); +} + TF_LITE_MICRO_TESTS_END diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/dequantize.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/dequantize.cc index f51db508d..1a62176f1 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/dequantize.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/dequantize.cc @@ -80,7 +80,7 @@ TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration Register_DEQUANTIZE() { +TfLiteRegistration_V1 Register_DEQUANTIZE() { return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare, DequantizeEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/detection_postprocess.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/detection_postprocess.cc index 326d87b52..8b19ee70f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/detection_postprocess.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/detection_postprocess.cc @@ -799,8 +799,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration* Register_DETECTION_POSTPROCESS() { - static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval); +TfLiteRegistration_V1* Register_DETECTION_POSTPROCESS() { + static TfLiteRegistration_V1 r = + tflite::micro::RegisterOp(Init, Prepare, Eval); return &r; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/div.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/div.cc index 5c9861269..8771ebc06 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/div.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/div.cc @@ -201,7 +201,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_DIV() { +TfLiteRegistration_V1 Register_DIV() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc index 81b27039f..1f3b5ecb4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,9 +25,6 @@ limitations under the License. #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { -namespace ops { -namespace micro { -namespace elementwise { namespace { constexpr int kAbsNameId = 0; @@ -351,9 +348,11 @@ TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) { context, node, [](float f) { return 1.f / std::sqrt(f); }, /*validate_input_func=*/nullptr, type); case kTfLiteInt8: - return EvalImplQuantized(context, node, - elementwise::RsqrtEvalQuantized, - elementwise::validate_input_func, type); + return EvalImplQuantized(context, node, RsqrtEvalQuantized, + validate_input_func, type); + case kTfLiteInt16: + return EvalImplQuantized(context, node, RsqrtEvalQuantized, + validate_input_func, type); default: MicroPrintf("Current data type %s is not supported.", @@ -371,60 +370,47 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -} // namespace elementwise -TfLiteRegistration Register_ABS() { +TfLiteRegistration_V1 Register_ABS() { return tflite::micro::RegisterOp( - elementwise::ElementWiseAbsRsqrtInit, - elementwise::PrepareAbsRsqrt, - elementwise::AbsEval); + ElementWiseAbsRsqrtInit, PrepareAbsRsqrt, + AbsEval); } -TfLiteRegistration Register_SIN() { +TfLiteRegistration_V1 Register_SIN() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::SinEval); + nullptr, GenericPrepare, SinEval); } -TfLiteRegistration Register_COS() { +TfLiteRegistration_V1 Register_COS() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::CosEval); + nullptr, GenericPrepare, CosEval); } -TfLiteRegistration Register_LOG() { +TfLiteRegistration_V1 Register_LOG() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::LogEval); + nullptr, GenericPrepare, LogEval); } -TfLiteRegistration Register_SQRT() { +TfLiteRegistration_V1 Register_SQRT() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::SqrtEval); + nullptr, GenericPrepare, SqrtEval); } -TfLiteRegistration Register_RSQRT() { +TfLiteRegistration_V1 Register_RSQRT() { return tflite::micro::RegisterOp( - elementwise::ElementWiseAbsRsqrtInit, - elementwise::PrepareAbsRsqrt, - elementwise::RsqrtEval); + ElementWiseAbsRsqrtInit, + PrepareAbsRsqrt, RsqrtEval); } -TfLiteRegistration Register_SQUARE() { +TfLiteRegistration_V1 Register_SQUARE() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::SquareEval); + nullptr, GenericPrepare, SquareEval); } -TfLiteRegistration Register_LOGICAL_NOT() { +TfLiteRegistration_V1 Register_LOGICAL_NOT() { return tflite::micro::RegisterOp( - nullptr, elementwise::GenericPrepare, - elementwise::LogicalNotEval); + nullptr, GenericPrepare, LogicalNotEval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elu.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elu.cc index c4786d6fc..482baed26 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elu.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elu.cc @@ -144,7 +144,7 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_ELU() { +TfLiteRegistration_V1 Register_ELU() { return tflite::micro::RegisterOp(EluInit, EluPrepare, EluEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.cc index c305121e8..1b792fb84 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.cc @@ -20,7 +20,7 @@ limitations under the License. namespace tflite { -TfLiteRegistration* Register_ETHOSU() { return nullptr; } +TfLiteRegistration_V1* Register_ETHOSU() { return nullptr; } const char* GetString_ETHOSU() { return ""; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.h index cfbb0d3f7..93ef1d5b4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ethosu.h @@ -19,7 +19,7 @@ limitations under the License. namespace tflite { -TfLiteRegistration* Register_ETHOSU(); +TfLiteRegistration_V1* Register_ETHOSU(); const char* GetString_ETHOSU(); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/exp.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/exp.cc index a835ee0af..44a39f453 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/exp.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/exp.cc @@ -72,7 +72,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_EXP() { +TfLiteRegistration_V1 Register_EXP() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/expand_dims.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/expand_dims.cc index ad45dd882..4c98ef9d3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/expand_dims.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/expand_dims.cc @@ -142,7 +142,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_EXPAND_DIMS() { +TfLiteRegistration_V1 Register_EXPAND_DIMS() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fill.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fill.cc index 6a3f4998e..a759a0fe5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fill.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fill.cc @@ -133,7 +133,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_FILL() { +TfLiteRegistration_V1 Register_FILL() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc index 6b2a4cc25..bf6404c39 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,8 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace floor { + +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; @@ -39,12 +38,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorData(output)); return kTfLiteOk; } -} // namespace floor -TfLiteRegistration Register_FLOOR() { - return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval); +} // namespace + +TfLiteRegistration_V1 Register_FLOOR() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_div.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_div.cc index f143d28af..d70080e73 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_div.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_div.cc @@ -123,7 +123,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_FLOOR_DIV() { +TfLiteRegistration_V1 Register_FLOOR_DIV() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_mod.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_mod.cc index 939a4dd78..aa53b1577 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_mod.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor_mod.cc @@ -121,7 +121,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_FLOOR_MOD() { +TfLiteRegistration_V1 Register_FLOOR_MOD() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc index a148ce448..66ab0a900 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" @@ -54,6 +55,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + if ((input->type == kTfLiteFloat32 && filter->type != kTfLiteFloat32) || + (input->type == kTfLiteInt8 && + (filter->type != kTfLiteInt8 && filter->type != kTfLiteInt4)) || + (input->type == kTfLiteInt16 && filter->type != kTfLiteInt8)) { + MicroPrintf("Input type: %s with filter type : %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(filter->type)); + return kTfLiteError; + } + if (filter->type == kTfLiteInt4) { int filter_size = RuntimeShape(filter->dims->size, @@ -113,29 +124,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt8: { switch (filter->type) { - case kTfLiteInt8: { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); tflite::reference_integer_ops::FullyConnected( FullyConnectedParamsQuantized(data), tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; } - case kTfLiteInt4: { - int8_t* unpacked_filter_data = static_cast( - context->GetScratchBuffer(context, data.filter_buffer_index)); - tflite::reference_integer_ops::FullyConnectedWithPackedInt4Weights( + case kTfLiteInt8: { + tflite::reference_integer_ops::FullyConnected( FullyConnectedParamsQuantized(data), tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), tflite::micro::GetTensorData(filter), - unpacked_filter_data, tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); @@ -185,7 +199,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_FULLY_CONNECTED() { +TfLiteRegistration_V1 Register_FULLY_CONNECTED() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h index 2083c3fbb..f44465887 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h @@ -68,15 +68,15 @@ TfLiteStatus CalculateOpDataFullyConnected( TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data); -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_FULLY_CONNECTED(); +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +TfLiteRegistration_V1 Register_FULLY_CONNECTED(); -#if defined(CMSIS_NN) || defined(HEXAGON) -// Returns a TfLiteRegistration struct for kernel variant that only supports +#if defined(CMSIS_NN) || defined(HEXAGON) || defined(XTENSA) +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8. -TfLiteRegistration Register_FULLY_CONNECTED_INT8(); +TfLiteRegistration_V1 Register_FULLY_CONNECTED_INT8(); #else // Note that while this block gets used for both reference and optimized kernels @@ -84,16 +84,16 @@ TfLiteRegistration Register_FULLY_CONNECTED_INT8(); // define fallback implementation that allow reference kernels to still be used // from applications that call a more specific kernel variant. -inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() { +inline TfLiteRegistration_V1 Register_FULLY_CONNECTED_INT8() { return Register_FULLY_CONNECTED(); } #endif #if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int16. -TfLiteRegistration Register_FULLY_CONNECTED_INT16(); +TfLiteRegistration_V1 Register_FULLY_CONNECTED_INT16(); #else // Note that while this block gets used for both reference and optimized kernels @@ -101,7 +101,7 @@ TfLiteRegistration Register_FULLY_CONNECTED_INT16(); // define fallback implementation that allow reference kernels to still be used // from applications that call a more specific kernel variant. -inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() { +inline TfLiteRegistration_V1 Register_FULLY_CONNECTED_INT16() { return Register_FULLY_CONNECTED(); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc index e7d0056c3..5a8d312de 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc @@ -64,12 +64,13 @@ TfLiteStatus CalculateOpDataFullyConnected( QuantizeMultiplier(real_multiplier, &data->output_multiplier, &data->output_shift); - data->input_zero_point = input->params.zero_point; // Filter weights will always be symmetric quantized since we only support // int8 quantization. See // https://github.com/tensorflow/tensorflow/issues/44912 for additional // context. TFLITE_DCHECK(filter->params.zero_point == 0); + + data->input_zero_point = input->params.zero_point; data->filter_zero_point = filter->params.zero_point; data->output_zero_point = output->params.zero_point; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather.cc index 4ec534731..9c8589578 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather.cc @@ -217,7 +217,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_GATHER() { +TfLiteRegistration_V1 Register_GATHER() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc index 1f46dd1ef..27307d1ae 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc @@ -86,6 +86,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Assign to output the input type. output->type = params->type; + // The tensor output dims must be relocated + // from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + // TFLM gather_nd does not create the output tensor, but it needs to ensure // that the output shape is correct. The result shape is // indices.shape[:-1] + params.shape[indices.shape[-1]:] @@ -198,7 +205,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_GATHER_ND() { +TfLiteRegistration_V1 Register_GATHER_ND() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/hard_swish.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/hard_swish.cc index a0b3f7c62..8e3a9cde4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/hard_swish.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/hard_swish.cc @@ -67,7 +67,7 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_HARD_SWISH() { +TfLiteRegistration_V1 Register_HARD_SWISH() { return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare, HardSwishEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/if.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/if.cc index 39eca8b48..a23bfc532 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/if.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/if.cc @@ -114,7 +114,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_IF() { +TfLiteRegistration_V1 Register_IF() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.cc index 070f32a5a..6ec2e3509 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.cc @@ -34,7 +34,7 @@ void ClearBufferApi(TfLiteContext* context_) { context_->RequestScratchBufferInArena = nullptr; } -KernelRunner::KernelRunner(const TfLiteRegistration& registration, +KernelRunner::KernelRunner(const TfLiteRegistration_V1& registration, TfLiteTensor* tensors, int tensors_size, TfLiteIntArray* inputs, TfLiteIntArray* outputs, void* builtin_data, TfLiteIntArray* intermediates) @@ -94,7 +94,7 @@ TfLiteStatus KernelRunner::Invoke() { context_.GetScratchBuffer = MicroContextGetScratchBuffer; if (registration_.invoke == nullptr) { - MicroPrintf("TfLiteRegistration missing invoke function pointer!"); + MicroPrintf("TfLiteRegistration_V1 missing invoke function pointer!"); return kTfLiteError; } @@ -110,7 +110,7 @@ TfLiteStatus KernelRunner::Free() { context_.GetScratchBuffer = MicroContextGetScratchBuffer; if (registration_.free == nullptr) { - MicroPrintf("TfLiteRegistration missing free function pointer!"); + MicroPrintf("TfLiteRegistration_V1 missing free function pointer!"); return kTfLiteError; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.h index c7d53c3a5..3eebf9d5e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_runner.h @@ -25,7 +25,7 @@ limitations under the License. namespace tflite { namespace micro { -// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) +// Helper class to perform a simulated kernel (i.e. TfLiteRegistration_V1) // lifecycle (init, prepare, invoke). All internal allocations are handled by // this class. Simply pass in the registration, list of required tensors, inputs // array, outputs array, and any pre-builtin data. Calling Invoke() will @@ -33,22 +33,22 @@ namespace micro { // output provided during construction. class KernelRunner { public: - KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors, + KernelRunner(const TfLiteRegistration_V1& registration, TfLiteTensor* tensors, int tensors_size, TfLiteIntArray* inputs, TfLiteIntArray* outputs, void* builtin_data, TfLiteIntArray* intermediates = nullptr); - // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any - // exceptions will be DebugLog'd and returned as a status code. + // Calls init and prepare on the kernel (i.e. TfLiteRegistration_V1) struct. + // Any exceptions will be DebugLog'd and returned as a status code. TfLiteStatus InitAndPrepare(const char* init_data = nullptr, size_t length = 0); - // Calls init, prepare, and invoke on a given TfLiteRegistration pointer. + // Calls init, prepare, and invoke on a given TfLiteRegistration_V1 pointer. // After successful invoke, results will be available in the output tensor as // passed into the constructor of this class. TfLiteStatus Invoke(); - // Calls Free on a given TfLiteRegistration pointer(if it's implemented). + // Calls Free on a given TfLiteRegistration_V1 pointer(if it's implemented). // After successful Free, kTfLiteOk status will be returned. If Free is not // implemented for a given kernel kTfLiteError will be returned. TfLiteStatus Free(); @@ -68,7 +68,7 @@ class KernelRunner { TfLiteContext context_ = {}; TfLiteNode node_ = {}; - const TfLiteRegistration& registration_; + const TfLiteRegistration_V1& registration_; SingleArenaBufferAllocator* allocator_; MockMicroGraph mock_micro_graph_; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc index 0499260f5..6d7666721 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/micro_log.h" @@ -37,7 +38,7 @@ int ValidateTensorIndexing(const TfLiteContext* context, int index, } // namespace -TfLiteRegistration RegisterOp( +TfLiteRegistration_V1 RegisterOp( void* (*init)(TfLiteContext* context, const char* buffer, size_t length), TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node), TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node), @@ -49,8 +50,7 @@ TfLiteRegistration RegisterOp( /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, - /*version=*/0, - /*registration_external=*/nullptr}; + /*version=*/0}; } // Returns a mutable tensor for a given input index. is_variable must be checked @@ -256,5 +256,24 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, return kTfLiteOk; } +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor) { + if (tensor->type != kTfLiteInt4) { + return *tensor; + } + + TfLiteEvalTensor new_tensor; + new_tensor.data.data = static_cast( + context->GetScratchBuffer(context, scratch_buffer_index)); + new_tensor.dims = tensor->dims; + new_tensor.type = kTfLiteInt8; + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(tensor), + tflite::micro::GetTensorShape(tensor).FlatSize(), + tflite::micro::GetTensorData(&new_tensor)); + return new_tensor; +} + } // namespace micro } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h index aa369605e..191ab2db4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h @@ -28,7 +28,7 @@ limitations under the License. namespace tflite { namespace micro { -TfLiteRegistration RegisterOp( +TfLiteRegistration_V1 RegisterOp( void* (*init)(TfLiteContext* context, const char* buffer, size_t length), TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node), TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node), @@ -131,6 +131,14 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, MicroGraph* graph_info, int subgraph_idx); +// If tensor is INT4, make a new TfLiteEvalTensor with data unpacked into +// a scratch buffer. The returned tensor will have the kTfLiteInt8 type. +// Assume scratch buffer is previously requested in Prepare, and +// scratch_buffer_index can be used to retrieve that buffer. +// If the tensor is not INT4, a shallow copy is returned. +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor); } // namespace micro } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2_pool_2d.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2_pool_2d.cc index d4225e466..794f2b679 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2_pool_2d.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2_pool_2d.cc @@ -135,7 +135,7 @@ TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_L2_POOL_2D() { +TfLiteRegistration_V1 Register_L2_POOL_2D() { return tflite::micro::RegisterOp(nullptr, L2Prepare, L2Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc index 5adea8e29..6c6ff4ae3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,9 +22,6 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace l2norm { namespace { @@ -37,8 +34,6 @@ enum KernelType { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; -} // namespace - TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); TFLITE_DCHECK(node->builtin_data != nullptr); @@ -135,14 +130,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace l2norm +} // namespace -TfLiteRegistration Register_L2NORM_REF() { - return tflite::micro::RegisterOp(l2norm::Init, l2norm::Prepare, l2norm::Eval); +TfLiteRegistration_V1 Register_L2NORM_REF() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); } -TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); } +TfLiteRegistration_V1 Register_L2_NORMALIZATION() { + return Register_L2NORM_REF(); +} -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu.cc index 7b51ebcb3..1873e3ccd 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu.cc @@ -87,7 +87,7 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } -TfLiteRegistration Register_LEAKY_RELU() { +TfLiteRegistration_V1 Register_LEAKY_RELU() { return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare, LeakyReluEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc index 7d3cb176f..3d1ffebb6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc @@ -51,16 +51,15 @@ TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, data->output_zero_point = output->params.zero_point; int output_shift_alpha; - double alpha_multiplier = static_cast(input->params.scale) * - static_cast(params->alpha) / - static_cast(output->params.scale); + double alpha_multiplier = static_cast( + input->params.scale * params->alpha / output->params.scale); QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, &output_shift_alpha); data->output_shift_alpha = static_cast(output_shift_alpha); int output_shift_identity; - double identity_multiplier = static_cast(input->params.scale) / - static_cast(output->params.scale); + double identity_multiplier = + static_cast(input->params.scale / output->params.scale); QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, &output_shift_identity); data->output_shift_identity = static_cast(output_shift_identity); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/log_softmax.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/log_softmax.cc index 0b1838c30..1ce04c650 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/log_softmax.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/log_softmax.cc @@ -141,7 +141,7 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_LOG_SOFTMAX() { +TfLiteRegistration_V1 Register_LOG_SOFTMAX() { return tflite::micro::RegisterOp(nullptr, LogSoftmaxPrepare, LogSoftmaxEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/logical.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/logical.cc index c85e0c5be..415c85c57 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/logical.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/logical.cc @@ -33,11 +33,11 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_LOGICAL_OR() { +TfLiteRegistration_V1 Register_LOGICAL_OR() { return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval); } -TfLiteRegistration Register_LOGICAL_AND() { +TfLiteRegistration_V1 Register_LOGICAL_AND() { return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/logistic.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/logistic.cc index 108206ad3..f968771cc 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/logistic.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/logistic.cc @@ -105,7 +105,7 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_LOGISTIC() { +TfLiteRegistration_V1 Register_LOGISTIC() { return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval); } } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc index 555ecd724..93d6bc7e4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,1446 +14,282 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/micro/kernels/lstm_eval.h" -#include -#include -#include -#include +#include -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/types.h" namespace tflite { -namespace lstm_internal { -// Calculates a single LSTM gate. -// -// Implements the following formula: (* is matrix multiply) -// gate = activate(W_input * input + W_aux * aux_input + -// W_peephole * cell + W_recurrent * prev_output + bias) -// with layer norm: -// gate = activate(W_norm * normalize(...) + bias) // not adding bias inside -// -// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) -// -// Parameters: -// Input vectors (to LSTM): | Size: | Optional? -// input | n_input | -// aux_input | n_aux_input | y (bidir LSTM) -// Input vectors (persistent states): -// output_state | n_output | -// cell_state | n_cell | -// 'Constant' inputs: -// input_to_gate_weights | n_cell * n_input | -// aux_input_to_gate_weights | n_cell * n_aux_input | y (bidir LSTM) -// recurrent_to_gate_weights | n_cell * n_output | -// cell_to_gate_weights | n_cell | y (peephole) -// gate_bias | n_cell | -// layer_norm_coefficients | n_cell | y (layer norm) -// Output vector: -// gate | n_cell | -// Scalar parameters: -// n_batch - batch size / number of vectors -// n_input, n_aux_input, n_output, n_cell - size of vectors. -// activation - activation to use. -// is_input_all_zeros, is_aux_input_all_zeros - if input vectors are all zero. -// use_layer_norm - if doing layer norm LSTM. -void CalculateLstmGateFloat( - const float* input, const float* input_to_gate_weights, - const float* aux_input, const float* aux_input_to_gate_weights, - const float* output_state, const float* recurrent_to_gate_weights, - const float* cell_state, const float* cell_to_gate_weights, - const float* layer_norm_coefficients, const float* gate_bias, - const int n_batch, const int n_input, const int n_aux_input, - const int n_output, const int n_cell, - const TfLiteFusedActivation activation, float* gate, - const bool is_input_all_zeros, const bool is_aux_input_all_zeros) { - const bool use_peephole = (cell_to_gate_weights != nullptr); - const bool use_layer_norm = (layer_norm_coefficients != nullptr); - // Initialize scratch buffers with bias for regular lstm or initialize with - // zero for layer norm lstm. - if (use_layer_norm) { - memset(gate, 0, n_cell * n_batch * sizeof(float)); - } else { - tflite::tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, - gate); - } - // For each batch and cell: compute input_weight * input. - // Skip if input is all zeros. - if (!is_input_all_zeros) { - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_gate_weights, n_cell, n_input, input, n_batch, gate); - } - // For each batch and cell: compute aux_input_weight * aux_input. - // Skip if auxiliary input is not available or all zeros. - if (!is_aux_input_all_zeros) { - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - aux_input_to_gate_weights, n_cell, n_aux_input, aux_input, n_batch, - gate); - } - // For each batch and cell: compute recurrent_weight * output_state. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_gate_weights, n_cell, n_output, output_state, n_batch, gate); - // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM) - if (use_peephole) { - tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_gate_weights, n_cell, cell_state, n_batch, gate); +LstmTensors::LstmTensors(TfLiteContext* context, TfLiteNode* node) { + micro_context_ = GetMicroContext(context); + // 24 internal tensors. see lstm_shared.h for tensor names + for (size_t i = 0; i < 24; i++) { + internal_tensors_[i] = micro_context_->AllocateTempInputTensor(node, i); } - // Do layer normalization (if layer norm LSTM) - if (use_layer_norm) { - tflite::tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch); - tflite::tensor_utils::VectorBatchVectorCwiseProduct( - layer_norm_coefficients, n_cell, gate, n_batch, gate); - tflite::tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, - gate); - } - // Apply activation - tflite::PortableApplyActivationToVector(gate, n_batch * n_cell, activation, - gate); + output_tensor_ = + micro_context_->AllocateTempOutputTensor(node, kLstmOutputTensor); } -// Updates the LSTM cell state, used by both float and hybrid LSTM versions. -// -// Implements the following formula: -// cell_state_new = clip(forget_gate * cell_state + input_gate * cell_gate) -// -// With CIFG LSTM, input gate is replaced by (1-forget_gate). -// -// Parameters: -// - n_batch, n_cell: sizes of vectors -// - cell_state: input/output vector, size n_batch*n_cell -// - input_gate: input vector, size n_batch*n_cell. -// - forget_gate: input/scratch vector, size n_batch*n_cell, modified with CIFG -// - cell_gate: input vector, size n_batch*n_cell. -// - use_cifg: use 1-forget_gate instead of input_gate. -// - clip: if > 0, clip the resulting cell state to [-clip, +clip]. -void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state, - const float* input_gate, float* forget_gate, - const float* cell_gate, bool use_cifg, float clip) { - tflite::tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, - n_batch * n_cell, cell_state); - - if (use_cifg) { - // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as - // scratch, as input_gate array is not allocated in this case. (Be careful - // not to write to the scratch before reading the forget gate data.) - float* scratch = forget_gate; - tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch); - tflite::tensor_utils::VectorVectorCwiseProductAccumulate( - cell_gate, scratch, n_batch * n_cell, cell_state); - } else { - tflite::tensor_utils::VectorVectorCwiseProductAccumulate( - cell_gate, input_gate, n_batch * n_cell, cell_state); - } - if (clip > 0.0f) { - tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip); - } -} - -// Calculates the output state tensor of an LSTM step. -// -// Implements the following formula: -// output_no_projection = output_gate .* activate(cell_state) -// (elementwise vector product) -// If no projection is used: -// output = output_state = output_no_projection -// With projection: -// output = output_state = clip(W*output_no_projection + bias) -// -// Output might not have a different 'stride' than n_batch, so we need to copy. -// -// Parameters: -// - n_batch: batches: the number of distinct vectors in each array. -// - n_cell, n_output: sizes of vectors. -// - cell_state, output_gate: input vectors, size n_batch*n_cell. -// - projection_weights, projection_weights_scale, projection_bias: -// constant inputs, describing projection matrix and bias. -// - proj_clip: if > 0, clip the output of the projection. -// - output_state: output vector, size n_batch*n_output. Must be contigous. -// - scratch: scratch area, size n_batch*n_cell. -void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, - const float* cell_state, const float* output_gate, - TfLiteFusedActivation activation, - const float* projection_weights, - const float* projection_bias, - const float proj_clip, float* output_state, - float* scratch) { - tflite::PortableApplyActivationToVector(cell_state, n_batch * n_cell, - activation, scratch); - tflite::tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, - n_batch * n_cell, scratch); - - const bool use_projection = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - - if (use_projection) { - if (use_projection_bias) { - tflite::tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, - n_batch, output_state); - } else { - memset(output_state, 0, n_batch * n_output * sizeof(float)); +LstmTensors::~LstmTensors() { + for (size_t i = 0; i < 24; i++) { + if (internal_tensors_[i] != nullptr) { + micro_context_->DeallocateTempTfLiteTensor(internal_tensors_[i]); } - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights, n_output, n_cell, scratch, n_batch, output_state); - if (proj_clip > 0.0f) { - tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output, - proj_clip); - } - } else { - std::memcpy(output_state, scratch, n_batch * n_output * sizeof(float)); } + micro_context_->DeallocateTempTfLiteTensor(output_tensor_); } -// Calculates a single LSTM gate, int8x8_16 version. -// Implements the same functionality as CalculateLstmGateFloat. -void CalculateLstmGateInteger8x8_16( - // Input and weights - const int8_t* input, const int8_t* input_to_gate_weights, - const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a, - const int32_t input_to_gate_scale_b, - // Output state and weights - const int8_t* output_state, const int8_t* recurrent_to_gate_weights, - const int32_t* recurrent_to_gate_bias, - const int32_t recurrent_to_gate_scale_a, - const int32_t recurrent_to_gate_scale_b, - // Cell state and weights - const int16_t* cell_state, const int16_t* cell_to_gate_weights, - const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b, - // Layer normalization parameters (layer norm LSTM) - const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias, - const int32_t layer_norm_input_scale_a, - const int32_t layer_norm_input_scale_b, - const int32_t layer_norm_variance_guard, - // Array sizes - const int n_batch, const int n_input, const int n_output, const int n_cell, - const TfLiteFusedActivation activation, - // Output - int16_t* gate, - // Parameters for performance optimizations - // Scratch arrays - int32_t* scratch5) { - const bool use_peephole = (cell_to_gate_weights != nullptr); - const bool use_layer_norm = (layer_norm_coefficients != nullptr); - - // Initialize scratch buffers with zeros. Note that unlike float and hybrid - // versions, bias is only used in layer normalization. - memset(gate, 0, n_batch * n_cell * sizeof(int16_t)); - // For each batch and cell: compute input_weight * input. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input, input_to_gate_bias, input_to_gate_weights, input_to_gate_scale_a, - input_to_gate_scale_b, n_batch, n_input, n_cell, 0, scratch5, gate, - nullptr); - // Note: no aux_input. - // For each batch and cell: compute recurrent_weight * output_state. - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - output_state, recurrent_to_gate_bias, recurrent_to_gate_weights, - recurrent_to_gate_scale_a, recurrent_to_gate_scale_b, n_batch, n_output, - n_cell, 0, scratch5, gate, nullptr); - // For each batch and cell: compute cell_weight * cell_state (peephole LSTM) - if (use_peephole) { - tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_gate_weights, n_output, cell_state, n_batch, - cell_to_gate_scale_a, cell_to_gate_scale_b, gate); - } - // Do layer normalization (if layer norm LSTM) - if (use_layer_norm) { - tflite::tensor_utils::ApplyLayerNorm( - gate, layer_norm_coefficients, layer_norm_bias, - layer_norm_input_scale_a, layer_norm_input_scale_b, - layer_norm_variance_guard, n_batch, n_cell, gate); +// Verify the LSTM internal tensor properties (e.g., type checks) +// Input/output/states/fc weights tensors are required for kernel evaulation. +// The state tensors should be variables. Variants of the standard LSTM +// are not supported here, therefore their corresponding tensors should be +// invalid +TfLiteStatus LstmTensors::ValidateTensorStatus(TfLiteContext* context) const { + // Verify certain tensor properties + // input tensor + TF_LITE_ENSURE(context, internal_tensors_[kLstmInputTensor] != nullptr); + // hidden state + TF_LITE_ENSURE(context, internal_tensors_[kLstmOutputStateTensor] != nullptr); + TF_LITE_ENSURE(context, + internal_tensors_[kLstmOutputStateTensor]->is_variable); + // hidden state becomes input so they must have the same type + TF_LITE_ENSURE_EQ(context, internal_tensors_[kLstmOutputStateTensor]->type, + internal_tensors_[kLstmInputTensor]->type); + // cell state + TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor] != nullptr); + TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor]->is_variable); + // output + TF_LITE_ENSURE(context, output_tensor_ != nullptr); + // output type is the same as the input type (activations) + TF_LITE_ENSURE_EQ(context, output_tensor_->type, + internal_tensors_[kLstmInputTensor]->type); + + // weight tensors (1-9, see lstm_shared for index definition) + const auto weight_type = + internal_tensors_[kLstmInputToForgetWeightsTensor]->type; + for (size_t i = 1; i < 9; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, weight_type); } - // Apply activation - switch (activation) { - case kTfLiteActSigmoid: - - reference_integer_ops::Logistic( - 0 /*data->input_multiplier*/, 0 /*data->input_left_shift */, - n_batch * n_cell /*NumElements(input->dims)*/, - gate /* tflite::micro::GetTensorData(input) */, - gate /*tflite::micro::GetTensorData(output) */); - - break; - case kTfLiteActTanh: { - int32_t dims_data = n_batch * n_cell; - RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data); - reference_integer_ops::Tanh(0, 0, tanh_inp_shape, gate, tanh_inp_shape, - gate); - } break; - default: - // Only Sigmoid or Tanh is used. - TFLITE_ASSERT_FALSE; + // bias tensors (12-15, see lstm_shared for index definition) + const auto bias_type = internal_tensors_[kLstmForgetGateBiasTensor]->type; + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, bias_type); } -} - -// Updates the LSTM cell state, used by both integer LSTM versions. -// Also see UpdateLstmCellFloat. -// -// Parameters: -// - n_batch, n_cell: sizes of vectors -// - cell_state: input/output vector, size n_batch*n_cell -// - cell_state_scale: scaling factor of cell state. -// - input_gate: input vector, size n_batch*n_cell. -// - forget_gate: input/scratch vector, size n_batch*n_cell, always modified. -// - cell_gate: input vector, size n_batch*n_cell. -// - use_cifg: use 1-forget_gate instead of input_gate. -// - clip: if > 0, clip the resulting cell state to [-clip, +clip]. -void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* input_gate, - int16_t* forget_gate, const int16_t* cell_gate, - bool use_cifg, int16_t clip) { - // Use the forget_gate array as scratch, as input_gate array is not allocated - // in CIFG case. (Be careful not to write to the scratch before reading the - // forget gate data.) - int16_t* scratch = forget_gate; - - tflite::tensor_utils::CwiseMul(forget_gate, cell_state, n_batch, n_cell, 15, - cell_state); - if (use_cifg) { - tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch); - tflite::tensor_utils::CwiseMul(scratch, cell_gate, n_batch, n_cell, - 30 + cell_state_scale, scratch); - } else { - tflite::tensor_utils::CwiseMul(input_gate, cell_gate, n_batch, n_cell, - 30 + cell_state_scale, scratch); + // Tensors from LSTM variants are invalid + // No peephole + for (size_t i = 9; i < 12; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); } - tflite::tensor_utils::CwiseAdd(cell_state, scratch, n_batch, n_cell, - cell_state); - - if (clip > 0) { - tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip); + // No projection + for (size_t i = 16; i < 18; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); } + // No internal layer norm + for (size_t i = 20; i < 24; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + return kTfLiteOk; } -// Calculates the output state tensor of an LSTM step. See Float and hybrid -// versions as well. -// -// Parameters: -// - n_batch: batches: the number of distinct vectors in each array. -// - n_cell, n_output: sizes of vectors. -// - cell_state, output_gate: input vectors, size n_batch*n_cell. -// - cell_state_scale: scaling of cell_state. -// - hidden_scale_[a|b]: effective scale of cell_state.*output_gate -// - hidden_zp: zero_point for cell_state.*output_gate -// - projection_weights, proj_scale_[a|b], projection_bias: -// constant inputs, describing projection matrix and bias. -// - output_state_zp: zero point of output_state. (Input, calibrated value.) -// - quantized_proj_clip: if > 0, clip the output of the projection. -// - output_state: output vector, size n_batch*n_output. Must be contigous. -// - scratch0: scratch area of size n_batch*n_cell -// - scratch1: scratch area of size n_batch*n_cell -// - scratch2: scratch area used by MatrixBatchVectorMultiplyAccumulate -void CalculateLstmOutputInteger8x8_16( - int n_batch, int n_cell, int n_output, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* output_gate, - int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp, - const int8_t* projection_weights, int32_t proj_scale_a, - int32_t proj_scale_b, const int32_t* projection_bias, - int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state, - int16_t* scratch0, int8_t* scratch1, int32_t* scratch2) { - // Note: unlike float/hybrid, the activation is always Tanh. +namespace lstm_internal { - { - int32_t tanh_input_left_shift = (15 + cell_state_scale) - 3; - int32_t dims_data = n_batch * n_cell; - if (tanh_input_left_shift < 0) /* handling negative shift value */ - { - int32_t i; - tanh_input_left_shift = -tanh_input_left_shift; - for (i = 0; i < dims_data; i++) { - cell_state[i] = cell_state[i] >> tanh_input_left_shift; - } - tanh_input_left_shift = 0; +const int32_t kInt16Max = std::numeric_limits::max(); +const int32_t kInt16Min = std::numeric_limits::min(); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + int32_t sum = input_1[index] + input_2[index]; + const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); + output[index] = static_cast(sum_clamped); } - RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data); - reference_integer_ops::Tanh(0, tanh_input_left_shift, tanh_inp_shape, - cell_state, tanh_inp_shape, scratch0); } - tflite::tensor_utils::CwiseMul(output_gate, scratch0, hidden_scale_a, - hidden_scale_b, n_batch, n_cell, hidden_zp, - scratch1); - - const bool use_projection = (projection_weights != nullptr); +} - if (use_projection) { - // Note: no bias like in float/hybrid - memset(output_state, 0, n_batch * n_output * sizeof(int8_t)); - tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( - scratch1, projection_bias, projection_weights, proj_scale_a, - proj_scale_b, n_batch, n_cell, n_output, output_state_zp, scratch2, - output_state, nullptr); - if (quantized_proj_clip > 0) { - tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output, - quantized_proj_clip); +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + output[index] = input_1[index] + input_2[index]; } - } else { - std::memcpy(output_state, scratch1, n_batch * n_output * sizeof(int8_t)); } } -// Performs an LSTM batch inference step for input specified by input_ptr. -// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and -// biases (*_bias_ptr), and buffers (*_scratch), along with additional -// parameters: -// - params: various LSTM params including activation, clipping, etc., -// - n_batch: size of batch, -// - n_cell: number of cells (or units), -// - n_input: the input size, -// - n_aux_input: the auxiliary input size. -// - n_output: the output size. -// - output_batch_leading_dim: the leading dimension of the output buffer. -// -// Input of size 'n_batch * n_input': -// input_ptr -// Input of size 'n_batch * n_aux_input': -// aux_input_ptr - optional (can be nullptr) -// -// LSTM weights: -// Input weights of size 'n_cell * n_input': -// input_to_input_weights - optional -// input_to_forget_weights -// input_to_cell_weights -// input_to_output_weights -// Auxiliary input weights of size 'n_cell * n_aux_input': -// aux_input_to_input_weights - optional -// aux_input_to_forget_weights - optional -// aux_input_to_cell_weights - optional -// aux_input_to_output_weights - optional -// Recurrent weights of size 'n_cell * n_output': -// recurrent_to_input_weights - optional -// recurrent_to_forget_weights -// recurrent_to_cell_weights -// recurrent_to_input_weights -// Peephole weights of size 'n_cell', representing diagonal matrices. -// cell_to_input_weights - optional -// cell_to_cell_weights - optional -// cell_to_output_weights - optional -// Projection weights of size 'n_output * n_cell' -// projection_weights_ptr - optional -// Gate biases of size 'n_cell': -// input_gate_bias_ptr - optional -// forget_gate_bias_ptr -// cell_gate_bias_ptr -// output_gate_bias_ptr -// -// Layer norm coefficients of size 'n_cell', representing diagonal matrices. -// input_layer_norm_coefficients_ptr - optional -// forget_layer_norm_coefficients_ptr - optional -// cell_layer_norm_coefficients_ptr - optional -// output_layer_norm_coefficients_ptr - optional -// -// The pointers to the cell and output state and the output are updated. -// -// The pointers input_ptr, aux_input_ptr, and output_ptr point to data aligned -// in batch_major order, and each step processes batch_size many inputs from -// input_ptr, and updates batch_size many cell and output states. -// -// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the -// output tensor, and in most cases will be equal to n_output. It is usually not -// when we want to store the LSTM output into a slice of the output tensor, e.g. -// for bidirectional LSTMs with merge_outputs. In this case, the batched -// operations cannot be used since they assume that the batched outputs are -// contiguous, and we manually loop over the batched outputs. -void LstmStepFloat( - const float* input_ptr, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, const float* aux_input_ptr, - const float* aux_input_to_input_weights_ptr, - const float* aux_input_to_forget_weights_ptr, - const float* aux_input_to_cell_weights_ptr, - const float* aux_input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, - const float* input_layer_norm_coefficients_ptr, - const float* forget_layer_norm_coefficients_ptr, - const float* cell_layer_norm_coefficients_ptr, - const float* output_layer_norm_coefficients_ptr, - const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr, - const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr, - const float* projection_weights_ptr, const float* projection_bias_ptr, - const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, - int n_aux_input, int n_output, int output_batch_leading_dim, - float* output_state_ptr, float* cell_state_ptr, float* scratch0, - float* scratch1, float* scratch2, float* scratch3, float* output_ptr) { - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weights_ptr == nullptr); +void Sigmoid(const RuntimeShape& data_shape, int16_t* data) { + reference_integer_ops::Logistic( + 0 /*data->input_multiplier*/, 0 /*data->input_left_shift */, + data_shape.FlatSize() /*NumElements(input->dims)*/, + data /* tflite::micro::GetTensorData(input) */, + data /*tflite::micro::GetTensorData(output) */); +} - // Make named scratch buffers. - float* input_gate_scratch = scratch0; - float* forget_gate_scratch = scratch1; - float* cell_gate_scratch = scratch2; - float* output_gate_scratch = scratch3; +void Sigmoid(const RuntimeShape& data_shape, float* data) { + reference_ops::Logistic(data_shape, data, data_shape, data); +} - // Check if inputs are all zeros so we can skip some computations. - const bool is_input_all_zeros = - tflite::tensor_utils::IsZeroVector(input_ptr, n_batch * n_input); - const bool is_aux_input_all_zeros = - (aux_input_ptr == nullptr || tflite::tensor_utils::IsZeroVector( - aux_input_ptr, n_batch * n_aux_input)); - if (!use_cifg) { - // Calculate the input gate. (If not CIFG.) - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_input_weights_ptr, aux_input_ptr, - aux_input_to_input_weights_ptr, output_state_ptr, - recurrent_to_input_weights_ptr, cell_state_ptr, - cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr, - input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, input_gate_scratch, - is_input_all_zeros, is_aux_input_all_zeros); - } - // Calculate the forget gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_forget_weights_ptr, aux_input_ptr, - aux_input_to_forget_weights_ptr, output_state_ptr, - recurrent_to_forget_weights_ptr, cell_state_ptr, - cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr, - forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Calculate the cell update gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_cell_weights_ptr, aux_input_ptr, - aux_input_to_cell_weights_ptr, output_state_ptr, - recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr, - /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, - cell_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - params->activation, cell_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Update the cell state. - lstm_internal::UpdateLstmCellFloat( - n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch, - cell_gate_scratch, use_cifg, params->cell_clip); - // Calculate output gate. - lstm_internal::CalculateLstmGateFloat( - input_ptr, input_to_output_weights_ptr, aux_input_ptr, - aux_input_to_output_weights_ptr, output_state_ptr, - recurrent_to_output_weights_ptr, cell_state_ptr, - cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr, - output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, - /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros, - is_aux_input_all_zeros); - // Update the output state. - lstm_internal::CalculateLstmOutputFloat( - n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch, - params->activation, projection_weights_ptr, projection_bias_ptr, - params->proj_clip, output_state_ptr, scratch2); - // Copy output state to the output. Note that the output's rows may not be - // contiguous (output_batch_leading_dim != n_output). - for (int b = 0; b < n_batch; b++) { - std::memcpy(output_ptr + b * output_batch_leading_dim, - output_state_ptr + b * n_output, n_output * sizeof(float)); +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data) { + int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3; + int32_t input_multiplier = 0; + if (tanh_input_left_shift < 0) /* handling negative shift value */ + { + tanh_input_left_shift = -tanh_input_left_shift; + input_multiplier = 3; } + reference_integer_ops::Tanh(input_multiplier, tanh_input_left_shift, + input_data_shape, input_data, output_data_shape, + output_data); } -// Fully quantized lstm kernel for 16 bit gate matmul output. -// -// Input tensor of size n_batch * n_input: -// input_ptr -// -// LSTM weights: -// Quantized input weights of size 'n_cell * n_input': -// input_to_input_weight_ptr - optional -// input_to_forget_weight_ptr - optional -// input_to_cell_weight_ptr - optional -// input_to_output_weight_ptr - optional -// -// Quantized recurrent weights of size 'n_cell * n_output': -// recurrent_to_input_weight_ptr - optional -// recurrent_to_forget_weights_ptr -// recurrent_to_cell_weights_ptr -// recurrent_to_input_weights_ptr -// -// Quantized peephole weights of size 'n_cell', representing diagonal matrices. -// cell_to_input_weights - optional -// cell_to_cell_weights - optional -// cell_to_output_weights - optional -// -// Quantized projection weights of size 'n_output * n_cell' -// projection_weight_ptr - optional -// -// Weight scales (scalars) for each of the weights above. -// effective_input_to_input_scale_a - optional -// effective_input_to_input_scale_b - optional -// effective_input_to_forget_scale_a -// effective_input_to_forget_scale_b -// effective_input_to_cell_scale_a -// effective_input_to_cell_scale_b -// effective_input_to_output_scale_a -// effective_input_to_output_scale_b -// effective_recurrent_to_input_scale_a - optional -// effective_recurrent_to_input_scale_b - optional -// effective_recurrent_to_forget_scale_a -// effective_recurrent_to_forget_scale_b -// effective_recurrent_to_cell_scale_a -// effective_recurrent_to_cell_scale_b -// effective_recurrent_to_output_scale_a -// effective_recurrent_to_output_scale_b -// effective_proj_scale_a - optional -// effective_proj_scale_b - optional -// -// Gate biases of size 'n_cell': -// input_gate_bias_ptr - optional -// forget_gate_bias_ptr -// cell_gate_bias_ptr -// output_gate_bias_ptr -// -// Layer norm coefficients of size 'n_cell', representing diagonal matrices. -// layer_norm_input_weight_ptr - optional -// layer_norm_forget_weight_ptr - optional -// layer_norm_cell_weight_ptr - optional -// layer_norm_output_weight_ptr - optional -// -// Layer norm scales of size 'n_cell'. -// layer_norm_input_scale_a - optional -// layer_norm_input_scale_b - optional -// layer_norm_forget_scale_a - optional -// layer_norm_forget_scale_b - optional -// layer_norm_cell_scale_a - optional -// layer_norm_cell_scale_b - optional -// layer_norm_output_scale_a - optional -// layer_norm_output_scale_b - optional -// -// Scalar values: -// quantized_cell_clip: quantized clip value for cell. -// quantized_proj_clip: quantized clip value for projection. -// cell_state_scale: the power of two scale for cell state. -// -// Zero points: -// output_state_zp: zero point of output state -// hidden_zp: zero point for hidden state. -// -// Temporary pre-allocated storage for the calculation. Each is of size n_cell * -// n_batch. -// scratch0 -// scratch1 -// scratch2 -// scratch3 -// scratch4 -// scratch5: this scratch buffer is created purely for optimizing the -// MatrixBatchVectorMultiplyAccumulate. -// -// Outputs: -// output_state_ptr - size 'n_batch * n_output' -// cell_state_ptr - size 'n_batch * n_cell' -// output_ptr - size 'n_batch * n_output' -// TODO(b/159947023): scratch0 is not used if (!cifg). Don't allocate then. -void LstmStepInteger8x8_16( - const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr, - int32_t effective_input_to_input_scale_a, - int32_t effective_input_to_input_scale_b, - const int8_t* input_to_forget_weight_ptr, - int32_t effective_input_to_forget_scale_a, - int32_t effective_input_to_forget_scale_b, - const int8_t* input_to_cell_weight_ptr, - int32_t effective_input_to_cell_scale_a, - int32_t effective_input_to_cell_scale_b, - const int8_t* input_to_output_weight_ptr, - int32_t effective_input_to_output_scale_a, - int32_t effective_input_to_output_scale_b, - const int8_t* recurrent_to_input_weight_ptr, - int32_t effective_recurrent_to_input_scale_a, - int32_t effective_recurrent_to_input_scale_b, - const int8_t* recurrent_to_forget_weight_ptr, - int32_t effective_recurrent_to_forget_scale_a, - int32_t effective_recurrent_to_forget_scale_b, - const int8_t* recurrent_to_cell_weight_ptr, - int32_t effective_recurrent_to_cell_scale_a, - int32_t effective_recurrent_to_cell_scale_b, - const int8_t* recurrent_to_output_weight_ptr, - int32_t effective_recurrent_to_output_scale_a, - int32_t effective_recurrent_to_output_scale_b, - const int16_t* cell_to_input_weight_ptr, - int32_t effective_cell_to_input_scale_a, - int32_t effective_cell_to_input_scale_b, - const int16_t* cell_to_forget_weight_ptr, - int32_t effective_cell_to_forget_scale_a, - int32_t effective_cell_to_forget_scale_b, - const int16_t* cell_to_output_weight_ptr, - int32_t effective_cell_to_output_scale_a, - int32_t effective_cell_to_output_scale_b, - const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a, - int32_t effective_proj_scale_b, int32_t hidden_zp, - int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b, - const int16_t* layer_norm_input_weight_ptr, - int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b, - const int16_t* layer_norm_forget_weight_ptr, - int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b, - const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a, - int32_t layer_norm_cell_scale_b, - const int16_t* layer_norm_output_weight_ptr, - int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b, - const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr, - const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr, - int16_t quantized_cell_clip, int8_t quantized_proj_clip, - int32_t cell_state_scale, int32_t input_variance_guard, - int32_t forget_variance_guard, int32_t cell_variance_guard, - int32_t output_variance_guard, - const int32_t* input_to_forget_effective_bias, - const int32_t* recurrent_to_forget_effective_bias, - const int32_t* input_to_cell_effective_bias, - const int32_t* recurrent_to_cell_effective_bias, - const int32_t* input_to_output_effective_bias, - const int32_t* recurrent_to_output_effective_bias, - const int32_t* input_to_input_effective_bias, - const int32_t* recurrent_to_input_effective_bias, - const int32_t* projection_effective_bias, int n_batch, int n_cell, - int n_input, int n_output, int8_t* output_state_ptr, - int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr, - int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3, - int8_t* scratch4, int32_t* scratch5) { - // Make named scratch buffers for the different gates. - int16_t* input_gate_scratch = scratch0; - int16_t* forget_gate_scratch = scratch1; - int16_t* cell_gate_scratch = scratch2; - int16_t* output_gate_scratch = scratch3; +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data) { + reference_ops::Tanh(input_data_shape, input_data, output_data_shape, + output_data); +} - // Since we have already checked that weights are all there or none, we - // can check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weight_ptr == nullptr); +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); +} - // Check for nullptrs. - TFLITE_DCHECK(input_to_forget_effective_bias); - TFLITE_DCHECK(recurrent_to_forget_effective_bias); - TFLITE_DCHECK(input_to_cell_effective_bias); - TFLITE_DCHECK(recurrent_to_cell_effective_bias); - TFLITE_DCHECK(input_to_output_effective_bias); - TFLITE_DCHECK(recurrent_to_output_effective_bias); - if (!use_cifg) { - TFLITE_DCHECK(input_to_input_effective_bias); - TFLITE_DCHECK(recurrent_to_input_effective_bias); - } - const bool use_projection = (projection_weight_ptr != nullptr); - if (use_projection) { - TFLITE_DCHECK(projection_effective_bias); - } - if (!use_cifg) { - // Calculate the input gate. (If not CIFG.) - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_input_weight_ptr, input_to_input_effective_bias, - effective_input_to_input_scale_a, effective_input_to_input_scale_b, - output_state_ptr, recurrent_to_input_weight_ptr, - recurrent_to_input_effective_bias, effective_recurrent_to_input_scale_a, - effective_recurrent_to_input_scale_b, cell_state_ptr, - cell_to_input_weight_ptr, effective_cell_to_input_scale_a, - effective_cell_to_input_scale_b, layer_norm_input_weight_ptr, - input_gate_bias_ptr, layer_norm_input_scale_a, layer_norm_input_scale_b, - input_variance_guard, n_batch, n_input, n_output, n_cell, - kTfLiteActSigmoid, input_gate_scratch, scratch5); - } - // Calculate the forget gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_forget_weight_ptr, input_to_forget_effective_bias, - effective_input_to_forget_scale_a, effective_input_to_forget_scale_b, - output_state_ptr, recurrent_to_forget_weight_ptr, - recurrent_to_forget_effective_bias, effective_recurrent_to_forget_scale_a, - effective_recurrent_to_forget_scale_b, cell_state_ptr, - cell_to_forget_weight_ptr, effective_cell_to_forget_scale_a, - effective_cell_to_forget_scale_b, layer_norm_forget_weight_ptr, - forget_gate_bias_ptr, layer_norm_forget_scale_a, - layer_norm_forget_scale_b, forget_variance_guard, n_batch, n_input, - n_output, n_cell, kTfLiteActSigmoid, forget_gate_scratch, scratch5); - // Calculate the cell update gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_cell_weight_ptr, input_to_cell_effective_bias, - effective_input_to_cell_scale_a, effective_input_to_cell_scale_b, - output_state_ptr, recurrent_to_cell_weight_ptr, - recurrent_to_cell_effective_bias, effective_recurrent_to_cell_scale_a, - effective_recurrent_to_cell_scale_b, cell_state_ptr, - /*cell_to_gate_weights=*/nullptr, /*cell_to_gate_scale_a=*/0, - /*cell_to_gate_scale_b=*/0, layer_norm_cell_weight_ptr, - cell_gate_bias_ptr, layer_norm_cell_scale_a, layer_norm_cell_scale_b, - cell_variance_guard, n_batch, n_input, n_output, n_cell, kTfLiteActTanh, - cell_gate_scratch, scratch5); - // Update the cell state. - lstm_internal::UpdateLstmCellInteger( - n_batch, n_cell, cell_state_ptr, cell_state_scale, input_gate_scratch, - forget_gate_scratch, cell_gate_scratch, use_cifg, quantized_cell_clip); - // Calculate the output gate. - lstm_internal::CalculateLstmGateInteger8x8_16( - input_ptr, input_to_output_weight_ptr, input_to_output_effective_bias, - effective_input_to_output_scale_a, effective_input_to_output_scale_b, - output_state_ptr, recurrent_to_output_weight_ptr, - recurrent_to_output_effective_bias, effective_recurrent_to_output_scale_a, - effective_recurrent_to_output_scale_b, cell_state_ptr, - cell_to_output_weight_ptr, effective_cell_to_output_scale_a, - effective_cell_to_output_scale_b, layer_norm_output_weight_ptr, - output_gate_bias_ptr, layer_norm_output_scale_a, - layer_norm_output_scale_b, output_variance_guard, n_batch, n_input, - n_output, n_cell, kTfLiteActSigmoid, output_gate_scratch, scratch5); - // Update the output state. - lstm_internal::CalculateLstmOutputInteger8x8_16( - n_batch, n_cell, n_output, cell_state_ptr, cell_state_scale, - output_gate_scratch, effective_hidden_scale_a, effective_hidden_scale_b, - hidden_zp, projection_weight_ptr, effective_proj_scale_a, - effective_proj_scale_b, projection_effective_bias, output_state_zp, - quantized_proj_clip, output_state_ptr, scratch0, scratch4, scratch5); - // Copy output state to the output. Note that unlike float or hybrid, output - // is always contiguous. - std::memcpy(output_ptr, output_state_ptr, - n_batch * n_output * sizeof(int8_t)); +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); } -} // namespace lstm_internal +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data) { + return reference_ops::Mul(params, shape, input1_data, shape, input2_data, + shape, output_data); +} -TfLiteStatus EvalFloatLstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3); - int max_time, n_batch; - if (input->dims->size == 3) { - max_time = (time_major) ? input->dims->data[0] : input->dims->data[1]; - n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0]; - } else { - max_time = 1; - n_batch = input->dims->data[0]; - } - const int n_input = input->dims->data[input->dims->size - 1]; - const int aux_input_size = - (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0; +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} - // n_cell and n_output will be the same size when there is no projection. - const int n_cell = input_to_output_weights->dims->data[0]; - const int n_output = recurrent_to_output_weights->dims->data[1]; +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to the get the condition. - const bool use_cifg = (input_to_input_weights == nullptr); +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data) { + return tflite::reference_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} - // Index the scratch buffers pointers to the global scratch buffer. - float* input_gate_scratch = nullptr; - float* cell_gate_scratch = nullptr; - float* forget_gate_scratch = nullptr; - float* output_gate_scratch = nullptr; - if (use_cifg) { - cell_gate_scratch = scratch_buffer; - forget_gate_scratch = scratch_buffer + n_cell * n_batch; - output_gate_scratch = scratch_buffer + 2 * n_cell * n_batch; - } else { - input_gate_scratch = scratch_buffer; - cell_gate_scratch = scratch_buffer + n_cell * n_batch; - forget_gate_scratch = scratch_buffer + 2 * n_cell * n_batch; - output_gate_scratch = scratch_buffer + 3 * n_cell * n_batch; +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = + std::max(std::min(cell_state_info.quantized_cell_clip, vector[i]), + static_cast(-cell_state_info.quantized_cell_clip)); } +} - const int output_batch_leading_dim = - output->dims->data[output->dims->size - 1]; - if (time_major) { - // Loop through the sequence. - const int input_step = n_batch * n_input; - const int output_step = n_batch * output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const float* input_ptr = - tflite::micro::GetTensorData(input) + t_rel * input_step; - const float* aux_input_ptr = nullptr; - if (aux_input) { - aux_input_ptr = - tflite::micro::GetTensorData(aux_input) + t_rel * input_step; - } - float* output_ptr = tflite::micro::GetTensorData(output) + - t_rel * output_step + output_offset; - - lstm_internal::LstmStepFloat( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - aux_input_ptr, - aux_input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(aux_input_to_input_weights), - aux_input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_forget_weights), - aux_input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(aux_input_to_cell_weights), - aux_input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_output_weights), - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_input_weights), - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_cell_weights), - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - projection_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_bias), - params, n_batch, n_cell, n_input, aux_input_size, n_output, - output_batch_leading_dim, - tflite::micro::GetTensorData(output_state), - tflite::micro::GetTensorData(cell_state), input_gate_scratch, - forget_gate_scratch, cell_gate_scratch, output_gate_scratch, - output_ptr); - } - } else { - for (int b = 0; b < n_batch; b++) { - const int input_step = n_input; - const int output_step = output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const int time_offset = b * max_time + t_rel; - const float* input_ptr = tflite::micro::GetTensorData(input) + - time_offset * input_step; - const float* aux_input_ptr = nullptr; - if (aux_input) { - aux_input_ptr = tflite::micro::GetTensorData(aux_input) + - time_offset * input_step; - } - float* output_ptr = tflite::micro::GetTensorData(output) + - time_offset * output_step + output_offset; - - // Offset the {output,cell}_state pointers to the right batch. - float* output_state_ptr = - tflite::micro::GetTensorData(output_state) + - b * output_batch_leading_dim; - float* cell_state_ptr = - tflite::micro::GetTensorData(cell_state) + b * n_cell; - // Offset the scratch pointers to the right batch. - float* input_gate_scratch_ptr = - input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr; - float* forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell; - float* cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell; - float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell; - - lstm_internal::LstmStepFloat( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - aux_input_ptr, - aux_input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_input_weights), - aux_input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_forget_weights), - aux_input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_cell_weights), - aux_input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - aux_input_to_output_weights), - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_cell_weights), - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - projection_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_bias), - params, - /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, - output_batch_leading_dim, output_state_ptr, cell_state_ptr, - input_gate_scratch_ptr, forget_gate_scratch_ptr, - cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr); - } - } +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = std::max(std::min(cell_state_info.cell_clip, vector[i]), + -cell_state_info.cell_clip); } - return kTfLiteOk; } -TfLiteStatus EvalInteger8x8_16Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, - const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1, - int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5) { - TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3); - const int n_input = input->dims->data[input->dims->size - 1]; - int max_time, n_batch; - if (input->dims->size == 2) { - max_time = 1; - n_batch = input->dims->data[0]; - } else { - max_time = (time_major) ? input->dims->data[0] : input->dims->data[1]; - n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0]; +// Increment the data offset so the sigle time step invocation call can access +// the corresponding input/output tensor data at the time step +void LstmStepManager::UpdateTime() { + current_time_ += 1; + TFLITE_DCHECK_LE(current_time_, size_info_.time_steps); + // default as one batch per inference + int input_step = size_info_.input_dimension; + int output_step = size_info_.state_dimension; + // time major: batch inference + if (size_info_.time_major) { + input_step = input_step * size_info_.batch_size; + output_step = output_step * size_info_.batch_size; } - // n_cell and n_output will be the same size when there is no projection. - const int n_cell = input_to_output_weights->dims->data[0]; - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Get params for time/batch/sequence. - const int output_batch_leading_dim = - output->dims->data[output->dims->size - 1]; - - if (time_major) { - const int input_step = n_batch * n_input; - const int output_step = n_batch * output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - const int t_rel = t; - int8_t* output_ptr = - tflite::micro::GetTensorData(output) + t_rel * output_step; - const int8_t* input_ptr = - tflite::micro::GetTensorData(input) + t_rel * input_step; - lstm_internal::LstmStepInteger8x8_16( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - integer_lstm_param->effective_input_to_input_scale_a, - integer_lstm_param->effective_input_to_input_scale_b, - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - integer_lstm_param->effective_input_to_forget_scale_a, - integer_lstm_param->effective_input_to_forget_scale_b, - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - integer_lstm_param->effective_input_to_cell_scale_a, - integer_lstm_param->effective_input_to_cell_scale_b, - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - integer_lstm_param->effective_input_to_output_scale_a, - integer_lstm_param->effective_input_to_output_scale_b, - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - integer_lstm_param->effective_recurrent_to_input_scale_a, - integer_lstm_param->effective_recurrent_to_input_scale_b, - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - integer_lstm_param->effective_recurrent_to_forget_scale_a, - integer_lstm_param->effective_recurrent_to_forget_scale_b, - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(recurrent_to_cell_weights), - integer_lstm_param->effective_recurrent_to_cell_scale_a, - integer_lstm_param->effective_recurrent_to_cell_scale_b, - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - integer_lstm_param->effective_recurrent_to_output_scale_a, - integer_lstm_param->effective_recurrent_to_output_scale_b, - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - integer_lstm_param->effective_cell_to_input_scale_a, - integer_lstm_param->effective_cell_to_input_scale_b, - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - integer_lstm_param->effective_cell_to_forget_scale_a, - integer_lstm_param->effective_cell_to_forget_scale_b, - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - integer_lstm_param->effective_cell_to_output_scale_a, - integer_lstm_param->effective_cell_to_output_scale_b, - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - integer_lstm_param->effective_proj_scale_a, - integer_lstm_param->effective_proj_scale_b, - integer_lstm_param->hidden_zp, - integer_lstm_param->effective_hidden_scale_a, - integer_lstm_param->effective_hidden_scale_b, - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - integer_lstm_param->layer_norm_input_scale_a, - integer_lstm_param->layer_norm_input_scale_b, - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - integer_lstm_param->layer_norm_forget_scale_a, - integer_lstm_param->layer_norm_forget_scale_b, - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - integer_lstm_param->layer_norm_cell_scale_a, - integer_lstm_param->layer_norm_cell_scale_b, - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - integer_lstm_param->layer_norm_output_scale_a, - integer_lstm_param->layer_norm_output_scale_b, - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - integer_lstm_param->quantized_cell_clip, - integer_lstm_param->quantized_proj_clip, - integer_lstm_param->cell_scale, - integer_lstm_param->input_variance_guard, - integer_lstm_param->forget_variance_guard, - integer_lstm_param->cell_variance_guard, - integer_lstm_param->output_variance_guard, - integer_lstm_param->input_to_forget_effective_bias, - integer_lstm_param->recurrent_to_forget_effective_bias, - integer_lstm_param->input_to_cell_effective_bias, - integer_lstm_param->recurrent_to_cell_effective_bias, - integer_lstm_param->input_to_output_effective_bias, - integer_lstm_param->recurrent_to_output_effective_bias, - integer_lstm_param->input_to_input_effective_bias, - integer_lstm_param->recurrent_to_input_effective_bias, - integer_lstm_param->projection_effective_bias, n_batch, n_cell, - n_input, n_output, tflite::micro::GetTensorData(output_state), - output_state_zp, tflite::micro::GetTensorData(cell_state), - output_ptr, scratch0, scratch1, scratch2, scratch3, scratch4, - scratch5); - } - } else { - for (int b = 0; b < n_batch; b++) { - const int input_step = n_input; - const int output_step = output_batch_leading_dim; - for (int t = 0; t < max_time; t++) { - // If this is the forward_sequence, step forward, otherwise step - // backwards. - const int t_rel = forward_sequence ? t : max_time - t - 1; - const int time_offset = b * max_time + t_rel; - const int8_t* input_ptr = tflite::micro::GetTensorData(input) + - time_offset * input_step; - int8_t* output_ptr = tflite::micro::GetTensorData(output) + - time_offset * output_step; + input_offset_ += input_step; + output_offset_ += output_step; +} - // Offset the {output,cell}_state pointers to the right batch. - int8_t* output_state_ptr = - tflite::micro::GetTensorData(output_state) + - b * output_batch_leading_dim; - int16_t* cell_state_ptr = - tflite::micro::GetTensorData(cell_state) + b * n_cell; +// Increment the data offset so the sigle time step invocation call can access +// the corresponding hidden/cell state tensor data at the time step (for single +// batch inference only) +void LstmStepManager::UpdateBatch() { + current_batch_ += 1; + TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size); + // batch inference for time major: no action needed + if (size_info_.time_major) { + return; + } + // otherwise: singe batch inference, go to the next batch + hidden_state_offset_ += size_info_.state_dimension; + cell_state_offset_ += size_info_.state_dimension; +} - lstm_internal::LstmStepInteger8x8_16( - input_ptr, - input_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_input_weights), - integer_lstm_param->effective_input_to_input_scale_a, - integer_lstm_param->effective_input_to_input_scale_b, - input_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_forget_weights), - integer_lstm_param->effective_input_to_forget_scale_a, - integer_lstm_param->effective_input_to_forget_scale_b, - input_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_cell_weights), - integer_lstm_param->effective_input_to_cell_scale_a, - integer_lstm_param->effective_input_to_cell_scale_b, - input_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_to_output_weights), - integer_lstm_param->effective_input_to_output_scale_a, - integer_lstm_param->effective_input_to_output_scale_b, - recurrent_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_input_weights), - integer_lstm_param->effective_recurrent_to_input_scale_a, - integer_lstm_param->effective_recurrent_to_input_scale_b, - recurrent_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_forget_weights), - integer_lstm_param->effective_recurrent_to_forget_scale_a, - integer_lstm_param->effective_recurrent_to_forget_scale_b, - recurrent_to_cell_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_cell_weights), - integer_lstm_param->effective_recurrent_to_cell_scale_a, - integer_lstm_param->effective_recurrent_to_cell_scale_b, - recurrent_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData( - recurrent_to_output_weights), - integer_lstm_param->effective_recurrent_to_output_scale_a, - integer_lstm_param->effective_recurrent_to_output_scale_b, - cell_to_input_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_input_weights), - integer_lstm_param->effective_cell_to_input_scale_a, - integer_lstm_param->effective_cell_to_input_scale_b, - cell_to_forget_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_forget_weights), - integer_lstm_param->effective_cell_to_forget_scale_a, - integer_lstm_param->effective_cell_to_forget_scale_b, - cell_to_output_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_to_output_weights), - integer_lstm_param->effective_cell_to_output_scale_a, - integer_lstm_param->effective_cell_to_output_scale_b, - projection_weights == nullptr - ? nullptr - : tflite::micro::GetTensorData(projection_weights), - integer_lstm_param->effective_proj_scale_a, - integer_lstm_param->effective_proj_scale_b, - integer_lstm_param->hidden_zp, - integer_lstm_param->effective_hidden_scale_a, - integer_lstm_param->effective_hidden_scale_b, - input_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - input_layer_norm_coefficients), - integer_lstm_param->layer_norm_input_scale_a, - integer_lstm_param->layer_norm_input_scale_b, - forget_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - forget_layer_norm_coefficients), - integer_lstm_param->layer_norm_forget_scale_a, - integer_lstm_param->layer_norm_forget_scale_b, - cell_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - cell_layer_norm_coefficients), - integer_lstm_param->layer_norm_cell_scale_a, - integer_lstm_param->layer_norm_cell_scale_b, - output_layer_norm_coefficients == nullptr - ? nullptr - : tflite::micro::GetTensorData( - output_layer_norm_coefficients), - integer_lstm_param->layer_norm_output_scale_a, - integer_lstm_param->layer_norm_output_scale_b, - input_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(input_gate_bias), - forget_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(forget_gate_bias), - cell_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(cell_gate_bias), - output_gate_bias == nullptr - ? nullptr - : tflite::micro::GetTensorData(output_gate_bias), - integer_lstm_param->quantized_cell_clip, - integer_lstm_param->quantized_proj_clip, - integer_lstm_param->cell_scale, - integer_lstm_param->input_variance_guard, - integer_lstm_param->forget_variance_guard, - integer_lstm_param->cell_variance_guard, - integer_lstm_param->output_variance_guard, - integer_lstm_param->input_to_forget_effective_bias, - integer_lstm_param->recurrent_to_forget_effective_bias, - integer_lstm_param->input_to_cell_effective_bias, - integer_lstm_param->recurrent_to_cell_effective_bias, - integer_lstm_param->input_to_output_effective_bias, - integer_lstm_param->recurrent_to_output_effective_bias, - integer_lstm_param->input_to_input_effective_bias, - integer_lstm_param->recurrent_to_input_effective_bias, - integer_lstm_param->projection_effective_bias, /*n_batch=*/1, - n_cell, n_input, n_output, output_state_ptr, output_state_zp, - cell_state_ptr, output_ptr, scratch0, scratch1, scratch2, scratch3, - scratch4, scratch5); - } - } +// Input shape for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::InputShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; } + const int dims[2] = {batch_size, size_info_.input_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); +} - return kTfLiteOk; +// State shape (both hidden and cell) for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::StateShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; + } + const int dims[2] = {batch_size, size_info_.state_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); } -} // namespace tflite \ No newline at end of file +} // namespace lstm_internal +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h index 7794adb5a..62bc6354e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,304 +12,530 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ +// Functions to perform integer evaulation for standard LSTM (e.g., defined in +// the keras lstm layer, no peephole etc.). Currently used by the 16 bits +// activation case only + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#include #include -#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/micro_log.h" namespace tflite { + +// Interface to access all the TempTfLiteTensors of the LSTM kernel during the +// preparation phase. Can only be constructed through the constructor to avoid +// memory leakage. All TempTfLiteTensors will be deallocated through the +// destructor. +class LstmTensors { + public: + LstmTensors(const LstmTensors& other) = delete; + LstmTensors& operator=(const LstmTensors& other) = delete; + + LstmTensors(TfLiteContext* context, TfLiteNode* node); + ~LstmTensors(); + + // Verify the LSTM internal tensor properties (e.g., type checks) + // Input/output/states/fc weights tensors are required for kernel evaulation. + // The state tensors should be variables. Variants of the standard LSTM + // are not supported here, therefore their corresponding tensors should be + // invalid + TfLiteStatus ValidateTensorStatus(TfLiteContext* context) const; + + // Internal tensors. see lstm_shared.h for tensor names + const TfLiteTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors_[tensor_index]; + } + + const TfLiteTensor* HiddenStateTensor() const { + return internal_tensors_[kLstmOutputStateTensor]; + } + const TfLiteTensor* CellStateTensor() const { + return internal_tensors_[kLstmCellStateTensor]; + } + const TfLiteTensor* OutputTensor() const { return output_tensor_; } + + private: + // see lstm_shared.h for tensor names + MicroContext* micro_context_; + TfLiteTensor* internal_tensors_[24]; + TfLiteTensor* output_tensor_; +}; + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape); + +TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int dim1_size, + int dim2_size); + +TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int size); + +// Go through every tensors and make sure their shape match the kernel +// configuration +TfLiteStatus ValidateTensorSize(TfLiteContext* context, + const LstmTensors& tensors, + const LstmSizeInfo& size_info); + +// Wrapper function to create gate parameters for the four internal LSTM gates +TfLiteStatus CreateGateParams( + TfLiteContext* context, + /*Input tensors*/ + const TfLiteTensor* input, const TfLiteTensor* input_weight, + const TfLiteTensor* input_bias, + /*Hidden state tensors*/ + const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight, + const TfLiteTensor* hidden_state_bias, + /*Scale of the fc output (input to non-linear activation)*/ + const float nonlinear_activation_input_scale, const TfLiteType cell_type, + const tflite::GateParameters& gate_params); + +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const TfLiteType output_type, + const int output_zp = 0); + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip); + +CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip); +tflite::FullyConnectedParams CreateFCParamsFloat(); + +tflite::GateParameters CreateGateParamsFloat(); + +tflite::ArithmeticParams CreateInterGateMulParamsFloat(); + +TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data_lstm); + +TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data_lstm); + +LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context, + TfLiteNode* node); + +template +LSTMBuffers CreateLSTMBuffers(TfLiteContext* context, + const int* buffer_indices) { + LSTMBuffers buffers; + buffers.buffer0 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[0])); + buffers.buffer1 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[1])); + buffers.buffer2 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[2])); + buffers.buffer3 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[3])); + return buffers; +} + // Since LSTM includes multiple intermediate stages, introducing the internal // namespace to expose them for testing namespace lstm_internal { -void CalculateLstmGateFloat( - const float* input, const float* input_to_gate_weights, - const float* aux_input, const float* aux_input_to_gate_weights, - const float* output_state, const float* recurrent_to_gate_weights, - const float* cell_state, const float* cell_to_gate_weights, - const float* layer_norm_coefficients, const float* gate_bias, - const int n_batch, const int n_input, const int n_aux_input, - const int n_output, const int n_cell, - const TfLiteFusedActivation activation, float* gate, - const bool is_input_all_zeros, const bool is_aux_input_all_zeros); - -void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state, - const float* input_gate, float* forget_gate, - const float* cell_gate, bool use_cifg, float clip); - -void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, - const float* cell_state, const float* output_gate, - TfLiteFusedActivation activation, - const float* projection_weights, - const float* projection_bias, - const float proj_clip, float* output_state, - float* scratch); - -void CalculateLstmGateInteger8x8_16( - // Input and weights - const int8_t* input, const int8_t* input_to_gate_weights, - const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a, - const int32_t input_to_gate_scale_b, - // Output state and weights - const int8_t* output_state, const int8_t* recurrent_to_gate_weights, - const int32_t* recurrent_to_gate_bias, - const int32_t recurrent_to_gate_scale_a, - const int32_t recurrent_to_gate_scale_b, - // Cell state and weights - const int16_t* cell_state, const int16_t* cell_to_gate_weights, - const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b, - // Layer normalization parameters (layer norm LSTM) - const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias, - const int32_t layer_norm_input_scale_a, - const int32_t layer_norm_input_scale_b, - const int32_t layer_norm_variance_guard, - // Array sizes - const int n_batch, const int n_input, const int n_output, const int n_cell, - const TfLiteFusedActivation activation, + +void Sigmoid(const RuntimeShape& data_shape, int16_t* data); + +void Sigmoid(const RuntimeShape& data_shape, float* data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output); + +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector); + +// Manages the slice position (offset), slice length (sliced tensor shape), +// and update rules for input/output/hidden state/cell state tensors at each +// time step. +class LstmStepManager { + public: + LstmStepManager() = delete; + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + explicit LstmStepManager(const LstmSizeInfo* size_info) + : size_info_(*size_info) {} + + void UpdateTime(); + void UpdateBatch(); + + void ResetTime() { current_time_ = 0; } + RuntimeShape InputShape() const; + RuntimeShape StateShape() const; + + int InputOffset() const { return input_offset_; } + int OutputOffset() const { return output_offset_; } + int HiddenStateOffset() const { return hidden_state_offset_; } + int CellStateOffset() const { return cell_state_offset_; } + + private: + int current_time_ = 0; + int current_batch_ = 0; + int input_offset_ = 0; + int output_offset_ = 0; + int hidden_state_offset_ = 0; + int cell_state_offset_ = 0; + // Sizeinfo is from LstmOpData, which reside in the memory arena + // (guarante to outlast LSTMStepManager, which reside in stack) + const LstmSizeInfo& size_info_; +}; + +// Calculates a single LSTM gate. +// Implements the following formula: +// gate = activate(FC(input) + FC(recurrent)) +// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) +template +void CalculateLstmGate( + const LstmStepManager& step_info, const GateParameters& gate_params, + // Input FC + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, // Output - int16_t* gate, - // Parameters for performance optimizations + CellType* gate_output, // Scratch arrays - int32_t* scratch5); - -void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* input_gate, - int16_t* forget_gate, const int16_t* cell_gate, - bool use_cifg, int16_t clip); - -void CalculateLstmOutputInteger8x8_16( - int n_batch, int n_cell, int n_output, int16_t* cell_state, - int32_t cell_state_scale, const int16_t* output_gate, - int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp, - const int8_t* projection_weights, int32_t proj_scale_a, - int32_t proj_scale_b, const int32_t* projection_bias, - int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state, - int16_t* scratch0, int8_t* scratch1, int32_t* scratch2); - -void LstmStepFloat( - const float* input_ptr, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, const float* aux_input_ptr, - const float* aux_input_to_input_weights_ptr, - const float* aux_input_to_forget_weights_ptr, - const float* aux_input_to_cell_weights_ptr, - const float* aux_input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, - const float* input_layer_norm_coefficients_ptr, - const float* forget_layer_norm_coefficients_ptr, - const float* cell_layer_norm_coefficients_ptr, - const float* output_layer_norm_coefficients_ptr, - const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr, - const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr, - const float* projection_weights_ptr, const float* projection_bias_ptr, - const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, - int n_aux_input, int n_output, int output_batch_leading_dim, - float* output_state_ptr, float* cell_state_ptr, float* scratch0, - float* scratch1, float* scratch2, float* scratch3, float* output_ptr); - -void LstmStepInteger8x8_16( - const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr, - int32_t effective_input_to_input_scale_a, - int32_t effective_input_to_input_scale_b, - const int8_t* input_to_forget_weight_ptr, - int32_t effective_input_to_forget_scale_a, - int32_t effective_input_to_forget_scale_b, - const int8_t* input_to_cell_weight_ptr, - int32_t effective_input_to_cell_scale_a, - int32_t effective_input_to_cell_scale_b, - const int8_t* input_to_output_weight_ptr, - int32_t effective_input_to_output_scale_a, - int32_t effective_input_to_output_scale_b, - const int8_t* recurrent_to_input_weight_ptr, - int32_t effective_recurrent_to_input_scale_a, - int32_t effective_recurrent_to_input_scale_b, - const int8_t* recurrent_to_forget_weight_ptr, - int32_t effective_recurrent_to_forget_scale_a, - int32_t effective_recurrent_to_forget_scale_b, - const int8_t* recurrent_to_cell_weight_ptr, - int32_t effective_recurrent_to_cell_scale_a, - int32_t effective_recurrent_to_cell_scale_b, - const int8_t* recurrent_to_output_weight_ptr, - int32_t effective_recurrent_to_output_scale_a, - int32_t effective_recurrent_to_output_scale_b, - const int16_t* cell_to_input_weight_ptr, - int32_t effective_cell_to_input_scale_a, - int32_t effective_cell_to_input_scale_b, - const int16_t* cell_to_forget_weight_ptr, - int32_t effective_cell_to_forget_scale_a, - int32_t effective_cell_to_forget_scale_b, - const int16_t* cell_to_output_weight_ptr, - int32_t effective_cell_to_output_scale_a, - int32_t effective_cell_to_output_scale_b, - const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a, - int32_t effective_proj_scale_b, int32_t hidden_zp, - int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b, - const int16_t* layer_norm_input_weight_ptr, - int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b, - const int16_t* layer_norm_forget_weight_ptr, - int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b, - const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a, - int32_t layer_norm_cell_scale_b, - const int16_t* layer_norm_output_weight_ptr, - int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b, - const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr, - const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr, - int16_t quantized_cell_clip, int8_t quantized_proj_clip, - int32_t cell_state_scale, int32_t input_variance_guard, - int32_t forget_variance_guard, int32_t cell_variance_guard, - int32_t output_variance_guard, - const int32_t* input_to_forget_effective_bias, - const int32_t* recurrent_to_forget_effective_bias, - const int32_t* input_to_cell_effective_bias, - const int32_t* recurrent_to_cell_effective_bias, - const int32_t* input_to_output_effective_bias, - const int32_t* recurrent_to_output_effective_bias, - const int32_t* input_to_input_effective_bias, - const int32_t* recurrent_to_input_effective_bias, - const int32_t* projection_effective_bias, int n_batch, int n_cell, - int n_input, int n_output, int8_t* output_state_ptr, - int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr, - int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3, - int8_t* scratch4, int32_t* scratch5); -} // namespace lstm_internal + CellType* fc_output_buffer, const TfLiteFusedActivation activation) { + const auto gate_output_shape = step_info.StateShape(); + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE(step_info.InputOffset() + step_info.InputShape().FlatSize(), + tflite::micro::GetTensorShape(input).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(recurrent).FlatSize()); -// Pamameters for integer LSTM. -// Consider split this into two Integer Parameters if more fields are added. -struct IntegerLstmParameter { - int32_t effective_input_to_input_scale_a = 0; - int32_t effective_input_to_input_scale_b = 0; - int32_t effective_recurrent_to_input_scale_a = 0; - int32_t effective_recurrent_to_input_scale_b = 0; - int32_t effective_cell_to_input_scale_a = 0; - int32_t effective_cell_to_input_scale_b = 0; - int32_t effective_input_to_forget_scale_a = 0; - int32_t effective_input_to_forget_scale_b = 0; - int32_t effective_recurrent_to_forget_scale_a = 0; - int32_t effective_recurrent_to_forget_scale_b = 0; - int32_t effective_cell_to_forget_scale_a = 0; - int32_t effective_cell_to_forget_scale_b = 0; - int32_t effective_input_to_cell_scale_a = 0; - int32_t effective_input_to_cell_scale_b = 0; - int32_t effective_recurrent_to_cell_scale_a = 0; - int32_t effective_recurrent_to_cell_scale_b = 0; - int32_t effective_input_to_output_scale_a = 0; - int32_t effective_input_to_output_scale_b = 0; - int32_t effective_recurrent_to_output_scale_a = 0; - int32_t effective_recurrent_to_output_scale_b = 0; - int32_t effective_cell_to_output_scale_a = 0; - int32_t effective_cell_to_output_scale_b = 0; - int32_t effective_proj_scale_a = 0; - int32_t effective_proj_scale_b = 0; - int32_t effective_hidden_scale_a = 0; - int32_t effective_hidden_scale_b = 0; - int32_t layer_norm_input_scale_a = 0; - int32_t layer_norm_input_scale_b = 0; - int32_t layer_norm_forget_scale_a = 0; - int32_t layer_norm_forget_scale_b = 0; - int32_t layer_norm_cell_scale_a = 0; - int32_t layer_norm_cell_scale_b = 0; - int32_t layer_norm_output_scale_a = 0; - int32_t layer_norm_output_scale_b = 0; - // Quantized clip value for cell and projection. Zero value means no - // clipping. - int16_t quantized_cell_clip = 0; - int8_t quantized_proj_clip = 0; - int32_t hidden_zp = 0; - int32_t cell_scale = 0; - - int32_t input_variance_guard = 0; - int32_t forget_variance_guard = 0; - int32_t cell_variance_guard = 0; - int32_t output_variance_guard = 0; - - // Pre-calculate bias + zero_point * weight. - int32_t* input_to_forget_effective_bias = nullptr; - int32_t* recurrent_to_forget_effective_bias = nullptr; - int32_t* input_to_cell_effective_bias = nullptr; - int32_t* recurrent_to_cell_effective_bias = nullptr; - int32_t* input_to_output_effective_bias = nullptr; - int32_t* recurrent_to_output_effective_bias = nullptr; - int32_t* input_to_input_effective_bias = nullptr; - int32_t* recurrent_to_input_effective_bias = nullptr; - int32_t* projection_effective_bias = nullptr; - - // Scale and zero point for intermediate tensors. - // Used only in the 8x8_8 case. - int32_t intermediate_scale_a[8] = {}; - int32_t intermediate_scale_b[8] = {}; - int32_t intermediate_zp[12] = {}; -}; + // Input FC + FullyConnected(gate_params.input_fc_params, step_info.InputShape(), + tflite::micro::GetTensorData(input) + + step_info.InputOffset(), + micro::GetTensorShape(input_weight), + tflite::micro::GetTensorData(input_weight), + tflite::micro::GetTensorShape(input_bias), + tflite::micro::GetOptionalTensorData(input_bias), + gate_output_shape, gate_output); + + // Recurrent FC + FullyConnected(gate_params.recurrent_fc_params, step_info.StateShape(), + tflite::micro::GetTensorData(recurrent) + + step_info.HiddenStateOffset(), + tflite::micro::GetTensorShape(recurrent_weight), + tflite::micro::GetTensorData(recurrent_weight), + tflite::micro::GetTensorShape(recurrent_bias), + tflite::micro::GetOptionalTensorData(recurrent_bias), + gate_output_shape, fc_output_buffer); + + AddElementWise(gate_output, fc_output_buffer, + /*n_batch=*/gate_output_shape.DimsData()[0], + /*n_state=*/gate_output_shape.DimsData()[1], gate_output); + // Apply activation + switch (activation) { + case kTfLiteActSigmoid: + Sigmoid(gate_output_shape, gate_output); + break; + case kTfLiteActTanh: { + // Set the scale power to -12 to avoid shift + Tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, + gate_output_shape, gate_output); + } break; + default: + // Only Sigmoid or Tanh is used. + TFLITE_ASSERT_FALSE; + } +} -TfLiteStatus EvalFloatLstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output); - -TfLiteStatus EvalInteger8x8_16Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, - const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1, - int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5); +// Update the cell state using the output from the forget gate, input gate, and +// cell gate Formula: updated_cell_state = forget_gate_output*cell_state + +// input_gate_output * cell_gate_output, where * denotes element wise +// multiplication +template +void UpdateLstmCell(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + // Gate outputs + CellType* forget_gate_output, + const CellType* input_gate_output, + const CellType* cell_gate_output, + // Mul parameters + const ArithmeticParams& forget_cell_mul_params, + const ArithmeticParams& input_mul_params, + const CellStateInfo& cell_state_info, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + auto cell_state_shape = step_info.StateShape(); + // Forget Gate x Cell State + Mul(cell_state_shape, forget_cell_mul_params, forget_gate_output, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + // Input Gate x Cell Gate + Mul(cell_state_shape, input_mul_params, input_gate_output, cell_gate_output, + buffer); + + // Update the cell state + AddElementWise(tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + buffer, + /*n_batch=*/cell_state_shape.DimsData()[0], + /*n_state=*/cell_state_shape.DimsData()[1], + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + + if (cell_state_info.cell_clip > 0) { + Clipping(cell_state_shape.FlatSize(), cell_state_info, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + } +} + +// Update the hidden state of the LSTM kernel using the following formula: +// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means +// element wise multiplication +template +void UpdateLstmHidden(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + TfLiteEvalTensor* hidden_state, + const CellType* output_gate_output, + const ArithmeticParams& mul_params, + int32_t cell_state_scale_power, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(hidden_state).FlatSize()); + + auto cell_state_shape = step_info.StateShape(); + CellType* cell_state_data = + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(); + // Tanh(cell_state) + Tanh(cell_state_scale_power, cell_state_shape, cell_state_data, + cell_state_shape, buffer); + // Update the hidden state + Mul(cell_state_shape, mul_params, buffer, output_gate_output, + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset()); +} + +template +void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + const LSTMBuffers& buffers) { + /*Step1: Calculate gate outputs to prepare cell state update*/ + CellType* gate_internal_buffer = buffers.buffer3; + CellType* forget_gate_output = buffers.buffer0; + CalculateLstmGate( + step_info, op_data.forget_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToForgetWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToForgetWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + forget_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + // Input Gate calculation; + CellType* input_gate_output = buffers.buffer1; + CalculateLstmGate( + step_info, op_data.input_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToInputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToInputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + input_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + // Cell Gate calculation + CellType* cell_gate_output = buffers.buffer2; + CalculateLstmGate( + step_info, op_data.cell_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToCellWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToCellWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + cell_gate_output, + // Scratch arrays + gate_internal_buffer, op_data.cell_gate_nonlinear_type); + + /*Step2: update the cell state */ + const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters; + CellType* updated_input_buffer = buffers.buffer1; // reuse buffer + + UpdateLstmCell(step_info, kernel_content.CellStateTensor(), + forget_gate_output, input_gate_output, + cell_gate_output, + inter_gate_params.forget_cell_mul_params, + inter_gate_params.input_mul_params, + op_data.cell_state_info, updated_input_buffer); + + /*Step3: update the hidden state */ + CellType* output_gate_output = buffers.buffer1; // reuse buffer + CalculateLstmGate( + step_info, op_data.output_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToOutputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToOutputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + output_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + CellType* tanh_activated_cell_buffer = buffers.buffer0; // reuse buffer + tflite::lstm_internal::UpdateLstmHidden( + step_info, kernel_content.CellStateTensor(), + kernel_content.HiddenStateTensor(), output_gate_output, + inter_gate_params.output_mul_params, + op_data.cell_state_info.cell_state_scale_power, + tanh_activated_cell_buffer); + + /*Step4: copy the update the hidden state to output*/ + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.OutputOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(kernel_content.output_tensor).FlatSize()); + // record the output (from the updated hidden state) + ActivationType* output_ptr = tflite::micro::GetTensorData( + kernel_content.output_tensor); + const auto* hidden_state = kernel_content.HiddenStateTensor(); + std::memcpy(output_ptr + step_info.OutputOffset(), + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset(), + step_info.StateShape().FlatSize() * sizeof(ActivationType)); +} + +} // namespace lstm_internal + +// Evaulate the LSTM kernel with (potential) multi-steps and multi-batch input +// Since +template +TfLiteStatus EvalLstm(const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + const LSTMBuffers& buffers) { + lstm_internal::LstmStepManager step_info(&op_data.size_info); + const auto& size_info = op_data.size_info; + // time is the first dimention, enable batch computation + if (size_info.time_major) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + } else { + // batch first, unable to size the input data. single batch inference + for (int b = 0; b < size_info.batch_size; b++) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + // prepare for the next batch + step_info.UpdateBatch(); + step_info.ResetTime(); + } + } + return kTfLiteOk; +} } // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_16ACT_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc new file mode 100644 index 000000000..22a6d4600 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc @@ -0,0 +1,326 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/fully_connected.h" +#include "tensorflow/lite/micro/kernels/lstm_eval.h" + +namespace tflite { + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; +} + +TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int dim1_size, + int dim2_size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 2); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], dim1_size); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[1], dim2_size); + return kTfLiteOk; +} + +TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 1); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], size); + return kTfLiteOk; +} + +// Go through every tensors and make sure their shape match the kernel +// configuration +TfLiteStatus ValidateTensorSize(TfLiteContext* context, + const LstmTensors& tensors, + const LstmSizeInfo& size_info) { + // Input FC weights + for (size_t i = 1; i < 5; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.input_dimension)); + } + // Recurrent FC weights + for (size_t i = 5; i < 9; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.state_dimension)); + } + // Biases + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE_OK( + context, ValidateBiasTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension)); + } + + // Check the shape of input state tensors. + // These tensor may be 1D or 2D. It's fine as long as the total size is + // correct. + TF_LITE_ENSURE_EQ(context, NumElements(tensors.HiddenStateTensor()), + size_info.batch_size * size_info.state_dimension); + TF_LITE_ENSURE_EQ(context, NumElements(tensors.CellStateTensor()), + size_info.batch_size * size_info.state_dimension); + + // Check the shape of output tensor against that of input tensor + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->size, 3); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[0], + tensors.OutputTensor()->dims->data[0]); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[1], + tensors.OutputTensor()->dims->data[1]); + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->data[2], + size_info.state_dimension); + return kTfLiteOk; +} + +// Wrapper function to create gate parameters for the four internal LSTM gates +TfLiteStatus CreateGateParams( + TfLiteContext* context, + /*Input tensors*/ + const TfLiteTensor* input, const TfLiteTensor* input_weight, + const TfLiteTensor* input_bias, + /*Hidden state tensors*/ + const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight, + const TfLiteTensor* hidden_state_bias, + /*Scale of the fc output (input to non-linear activation)*/ + const float nonlinear_activation_input_scale, const TfLiteType cell_type, + tflite::GateParameters& gate_params) { + // A temp tflite tensor to represent the output of fc operation. Only the data + // type and quantization parameters are set since it is only used for + // parameter calculations + TfLiteTensor fc_output_temp; + fc_output_temp.type = cell_type; + fc_output_temp.params.scale = nonlinear_activation_input_scale; + fc_output_temp.params.zero_point = 0; // symmetrical quantized + + // A temp fc opdata to reuse the helper function on creating fc parameters + tflite::OpDataFullyConnected fc_data_temp; + // TODO(b/265853320): due to the lack of precision for the float scale, + // scale_diff / output_scale <= 0.02 (potentially requires 1e-8 precision) can + // not be satisified for the bias. Here we rely on the correctiveness of the + // conversion process (set input_bias=nullptr to avoid checking) for + // tensor scales + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, input->type, input, input_weight, + /*input_bias=*/nullptr, &fc_output_temp, &fc_data_temp)); + gate_params.input_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + double real_multiplier = 0.0; + GetQuantizedConvolutionMultipler(context, input, input_weight, nullptr, + &fc_output_temp, &real_multiplier); + + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, hidden_state->type, hidden_state, + hidden_state_weight, hidden_state_bias, &fc_output_temp, &fc_data_temp)); + gate_params.recurrent_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + return kTfLiteOk; +} + +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const TfLiteType output_type, + const int output_zp) { + tflite::ArithmeticParams op_params = {}; + if (output_type == kTfLiteInt16) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } else if (output_type == kTfLiteInt8) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } + + op_params.input1_offset = 0; // symmetric + op_params.input2_offset = 0; // symmetric + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + -32768.0), + 32767.0)); + + return cell_state_info; +} + +CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip) { + CellStateInfo cell_state_info; + cell_state_info.cell_clip = cell_clip; + cell_state_info.cell_state_scale_power = 0; // no quantization + cell_state_info.quantized_cell_clip = 0; // no quantization + return cell_state_info; +} + +tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} + +tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data_lstm) { + // Gate Parameters + op_data_lstm->forget_gate_parameters = CreateGateParamsFloat(); + op_data_lstm->input_gate_parameters = CreateGateParamsFloat(); + op_data_lstm->cell_gate_parameters = CreateGateParamsFloat(); + op_data_lstm->output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data_lstm->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data_lstm->inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data_lstm->inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return kTfLiteOk; +} + +TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data_lstm) { + float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15 + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToForgetWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmForgetGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToForgetWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data_lstm->forget_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToInputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmInputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToInputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data_lstm->input_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToCellWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmCellGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToCellWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data_lstm->cell_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToOutputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmOutputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToOutputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data_lstm->output_gate_parameters)); + + // Inter gate multiplication parameters + float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15 + float cell_state_scale = lstm_tensors.CellStateTensor()->params.scale; + // forget gate output (nonlinear output) x cell state -> cell state + op_data_lstm->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, cell_state_scale, + cell_state_scale, kTfLiteInt16); + // input gate output x cell gate output -> cell state + op_data_lstm->inter_gate_parameters.input_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, nonlinear_output_scale, + cell_state_scale, kTfLiteInt16); + // tanh output x output gate output -> hidden state (potentially asymmetric) + op_data_lstm->inter_gate_parameters.output_mul_params = + CreateInterGateMulParams( + nonlinear_output_scale, nonlinear_output_scale, + lstm_tensors.HiddenStateTensor()->params.scale, + lstm_tensors.HiddenStateTensor()->type, + lstm_tensors.HiddenStateTensor()->params.zero_point); + return kTfLiteOk; +} + +LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context, + TfLiteNode* node) { + LSTMKernelContents kernel_content; + // Point to correct tensors + for (size_t i = 0; i < 24; i++) { + kernel_content.internal_tensors[i] = + tflite::micro::GetMutableEvalInput(context, node, i); + } + // Output tensor + kernel_content.output_tensor = tflite::micro::GetEvalOutput(context, node, 0); + return kernel_content; +} + +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h new file mode 100644 index 000000000..aee12cf39 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h @@ -0,0 +1,817 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ + +#include +#include + +#include "tensorflow/lite/micro/kernels/lstm_eval.h" +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" +#include "tensorflow/lite/micro/test_helpers.h" +#include "tensorflow/lite/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +/*Helper Functions (mainly about mimicking the kernel preparation)*/ + +// Create fully connected parameters using quantization settings of input and +// weight tensors. +// Since TfLiteContext is not available during the kernel test, here we mimic +// (put into stack memory) CalculateOpDataFullyConnected in +// tensorflow/lite/micro/kernels/fully_connected_common.cc +template +tflite::FullyConnectedParams CreateFCParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& weight_quant_params, + const float nonlinear_activation_input_scale) { + OpDataFullyConnected data; + const double input_product_scale = + input_quant_params.scale * weight_quant_params.scale; + double effective_scale = + input_product_scale / + static_cast(nonlinear_activation_input_scale); + + QuantizeMultiplier(effective_scale, &data.output_multiplier, + &data.output_shift); + + data.input_zero_point = input_quant_params.zero_point; + + data.filter_zero_point = 0; // symmetrically quantized + data.output_zero_point = 0; // symmetrically quantized + + data.output_activation_min = std::numeric_limits::min(); + data.output_activation_max = std::numeric_limits::max(); + + return tflite::FullyConnectedParamsQuantized(data); +} + +inline tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Wrapper function to create gate parameters for the four internal LSTM gates +template +tflite::GateParameters CreateGateParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& hidden_state_quant_params, + const GateQuantizationParameters& gate_quantization_settings, + const float nonlinear_activation_input_scale) { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParams( + input_quant_params, gate_quantization_settings.activation_weight, + nonlinear_activation_input_scale); + gate_params.recurrent_fc_params = CreateFCParams( + hidden_state_quant_params, gate_quantization_settings.recurrent_weight, + nonlinear_activation_input_scale); + return gate_params; +} + +inline tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +template +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const int output_zp = 0) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + op_params.input1_offset = 0; + op_params.input2_offset = 0; + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} + +inline tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + -32768.0), + 32767.0)); + return cell_state_info; +} + +// Create LSTMKernelContents from LstmNodeContent by copying TfLiteEvalTensor +// pointers +template +LSTMKernelContents CreateLSTMKernelContent( + LstmNodeContent& + node_contents) { + LSTMKernelContents kernel_content; + // Point to correct tensors + kernel_content.internal_tensors[kLstmInputTensor] = + node_contents.GetEvalTensor(kLstmInputTensor); + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToInputWeightsTensor); + kernel_content.internal_tensors[kLstmInputToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmInputToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToCellWeightsTensor); + kernel_content.internal_tensors[kLstmInputToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmInputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmInputGateBiasTensor); + kernel_content.internal_tensors[kLstmForgetGateBiasTensor] = + node_contents.GetEvalTensor(kLstmForgetGateBiasTensor); + kernel_content.internal_tensors[kLstmCellGateBiasTensor] = + node_contents.GetEvalTensor(kLstmCellGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputStateTensor] = + node_contents.GetEvalTensor(kLstmOutputStateTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmCellStateTensor] = + node_contents.GetEvalTensor(kLstmCellStateTensor); + // Not used internal tensors + kernel_content.internal_tensors[kLstmCellToInputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToForgetWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToOutputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionBiasTensor] = nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmForgetLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmCellLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmOutputLayerNormCoefficientsTensor] = + nullptr; + // Output tensor + kernel_content.output_tensor = node_contents.OutputEvalTensor(); + return kernel_content; +} + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; +} + +// Create the LstmOpData using the LstmNodeContent and +// NodeQuantizationParameters (defined in test_data/lstm_test_data) During the +// actual inference phase, OpDataLSTM is created using information from the +// flatbuffer file. The test divide the complete LSTM node information into +// LstmNodeContent and NodeQuantizationParameters for easy construction +// purposes +template +OpDataLSTM CreateLstmOpData( + LstmNodeContent& + node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + const auto& quantization_settings = node_contents.QuantizationSettings(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + + op_data.cell_state_info = CreateLstmCellStateInfo( + quantization_settings.cell_state.scale, builtin_data.cell_clip); + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.forget_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.input_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.input_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.cell_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.cell_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.output_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.output_gate, + quantization_settings.nonlinear_activation_input_scale); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + return op_data; +} + +template +OpDataLSTM CreateLstmOpDataFloat( + LstmNodeContent& node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + op_data.cell_state_info.cell_clip = builtin_data.cell_clip; + op_data.cell_state_info.quantized_cell_clip = 0; // No quantization + op_data.cell_state_info.cell_state_scale_power = 0; // No quantization + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParamsFloat(); + op_data.input_gate_parameters = CreateGateParamsFloat(); + op_data.cell_gate_parameters = CreateGateParamsFloat(); + op_data.output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return op_data; +} + +/*Test Functions Below Here*/ +template +void ValidateResultGoldens(const T* golden, const T* output_data, + const int output_len, const float tolerance) { + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance); + } +} + +template +void TestCalculateLstmGateFloat(const TfLiteEvalTensor* input, + const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, + const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Result comparison + TfLiteFusedActivation nonlinear_type, + const float* expected_vals, float tolerance) { + float gate_output[batch_size * state_dimension] = {}; + float fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParamsFloat(); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + ValidateResultGoldens(expected_vals, gate_output, + batch_size * state_dimension, tolerance); +} + +template +void TestCalculateLstmGateInteger( + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Quantization settings + const NodeQuantizationParameters& node_quantization_settings, + const GateQuantizationParameters& gate_quantization_settings, + // Result comparison + TfLiteFusedActivation nonlinear_type, const float* expected_vals, + float tolerance) { + CellType gate_output[batch_size * state_dimension] = {}; + CellType fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParams( + node_quantization_settings.input, node_quantization_settings.hidden_state, + gate_quantization_settings, + node_quantization_settings.nonlinear_activation_input_scale); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // only int8 weight is supported now + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + float gate_output_float[batch_size * state_dimension] = {}; + Dequantize(gate_output, batch_size * state_dimension, + node_quantization_settings.nonlinear_activation_output_scale, 0, + gate_output_float); + + ValidateResultGoldens(expected_vals, gate_output_float, + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParamsFloat(); + auto input_mul_params = CreateInterGateMulParamsFloat(); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // copy the data since it will be updated + float forget_gate[batch_size * state_dimension] = {}; + std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output, + batch_size * state_dimension * sizeof(float)); + + CellStateInfo cell_state_info; + cell_state_info.cell_clip = node_content.BuiltinData().cell_clip; + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, forget_gate, + gate_output_data.expected_input_gate_output, + gate_output_data.expected_cell_gate_output, forget_cell_mul_params, + input_mul_params, cell_state_info, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_forget_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_forget_gate_output, + quantized_forget_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_input_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_input_gate_output, + quantized_input_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_cell_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_cell_gate_output, + quantized_cell_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + auto input_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + + auto cell_state_info = + CreateLstmCellStateInfo(quantization_settings.cell_state.scale, + node_content.BuiltinData().cell_clip); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, quantized_forget_gate, quantized_input_gate, + quantized_cell_gate, forget_cell_mul_params, input_mul_params, + cell_state_info, buffer); + + float cell_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, cell_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + cell_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestUpdateLstmHiddenFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParamsFloat(); + + int32_t cell_state_scale_power = 0; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, + gate_output_data.expected_output_gate_output, mul_params, + cell_state_scale_power, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmHiddenInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_output_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_output_gate_output, + quantized_output_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + + int cell_state_scale_power_buffer; + tflite::CheckedLog2(quantization_settings.cell_state.scale, + &cell_state_scale_power_buffer); + int32_t cell_state_scale_power = cell_state_scale_power_buffer; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, quantized_output_gate, mul_params, + cell_state_scale_power, buffer); + + float hidden_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, hidden_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + hidden_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestLstmStepFloat( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + // Scratch buffers on the stack + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + + ValidateResultGoldens( + gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(kernel_content.HiddenStateTensor()), + batch_size * state_dimension, hidden_state_tolerance); + ValidateResultGoldens( + gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, cell_state_tolerance); +} + +template +void TestLstmStepInteger( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + + // Scratch buffers on the stack + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep(step_info, op_data, kernel_content, + buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData( + kernel_content.HiddenStateTensor()), + batch_size * state_dimension, quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, dequantized_hidden_state); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, dequantized_cell_state); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + ValidateResultGoldens(gate_output_data.expected_updated_cell, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); +} + +template +void TestEvalLstmFloat( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + + tflite::EvalLstm(op_data, kernel_content, + buffers); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + node_contents.GetHiddenStateData(), + batch_size * state_dimension, hidden_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_cell_state, + node_contents.GetCellStateData(), + batch_size * state_dimension, cell_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_output, + node_contents.GetOutputData(), + batch_size * state_dimension, hidden_state_tolerance); +} + +template +void TestEvalLstmInteger( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + + tflite::EvalLstm( + op_data, kernel_content, buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, + dequantized_hidden_state); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetCellStateData(), batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, + dequantized_cell_state); + ValidateResultGoldens(eval_check_data.expected_cell_state, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); + + float dequantized_output[batch_size * state_dimension * time_steps] = {}; + Dequantize(node_contents.GetOutputData(), + batch_size * state_dimension * time_steps, + quantization_settings.output.scale, + quantization_settings.output.zero_point, dequantized_output); + ValidateResultGoldens(eval_check_data.expected_output, dequantized_output, + batch_size * state_dimension, hidden_state_tolerance); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h index ee34b8489..dbdc3c553 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h @@ -15,6 +15,9 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ #define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/kernels/internal/types.h" + namespace tflite { // Input Tensors of size {n_batch, n_input} @@ -63,5 +66,85 @@ constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional // Output tensors. constexpr int kLstmOutputTensor = 0; +// Parameters for the two fully conncted computation inside each gate +struct GateParameters { + FullyConnectedParams input_fc_params; + FullyConnectedParams recurrent_fc_params; +}; + +// Paramaters for the element wise multiplications between gate outputs +struct InterGateParameters { + ArithmeticParams forget_cell_mul_params; + ArithmeticParams input_mul_params; + ArithmeticParams output_mul_params; +}; + +// Size information about the LSTM kernel, which is deduced from tensors stored +// in the flat buffer file. +struct LstmSizeInfo { + bool time_major; + int batch_size; + int time_steps; + int input_dimension; + int state_dimension; +}; + +// Contains information about the cell state tensor +struct CellStateInfo { + float cell_clip; + // clipping range for cell state only 16 bits cell is supported (could be + // generalized through templatation) + int16_t quantized_cell_clip; + // 2^-cell_state_scale_power = cell state scale, required by integer tanh + // computation + int32_t cell_state_scale_power; +}; + +// Contains required computation information for LSTM kernel evaluation. +// Specifically, it includes shape and quantization settings for the LSTM +// internal operations. Formatted to support operations defined in the +// tensorflow/lite/kernels/internal/reference/integer_ops +// Should be constructed during the preparation phase +struct OpDataLSTM { + LstmSizeInfo size_info; + CellStateInfo cell_state_info; + TfLiteFusedActivation cell_gate_nonlinear_type; + GateParameters forget_gate_parameters; + GateParameters input_gate_parameters; + GateParameters cell_gate_parameters; + GateParameters output_gate_parameters; + InterGateParameters inter_gate_parameters; + int buffer_indices[4]; // TFLM only +}; + +// Provide an interface to access the internal tensors and buffers used for LSTM +// invocation. Constructed during the invocation phase +struct LSTMKernelContents { + public: + // Internal tensors, fixed (const). see lstm_shared.h for tensor names + const TfLiteEvalTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors[tensor_index]; + } + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateTensor() { + return internal_tensors[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateTensor() { + return internal_tensors[kLstmCellStateTensor]; + } + // Node internal tensors with indexes defined at the beginning of the file + TfLiteEvalTensor* internal_tensors[24]; + TfLiteEvalTensor* output_tensor; +}; + +template +struct LSTMBuffers { + // TFLM buffers requires buffer index from LstmOpData. + CellType* buffer0; + CellType* buffer1; + CellType* buffer2; + CellType* buffer3; +}; + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc index 1aebdefdc..434e4efa1 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace maximum_minimum { + namespace { // This file has a reference implementation of TFMaximum/TFMinimum. @@ -65,8 +63,6 @@ struct MinimumOp { } }; -} // namespace - template void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) { @@ -111,22 +107,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace maximum_minimum +} // namespace -TfLiteRegistration Register_MAXIMUM() { - return tflite::micro::RegisterOp( - nullptr, nullptr, - maximum_minimum::Eval); +TfLiteRegistration_V1 Register_MAXIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); } -TfLiteRegistration Register_MINIMUM() { - return tflite::micro::RegisterOp( - nullptr, nullptr, - maximum_minimum::Eval); +TfLiteRegistration_V1 Register_MINIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h index df2a8d2c3..14b874d06 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,103 +31,107 @@ namespace tflite { // (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should // have their Register function declarations in the tflite namespace. -TfLiteRegistration Register_ADD(); -TfLiteRegistration Register_ADD_N(); -TfLiteRegistration Register_ARG_MAX(); -TfLiteRegistration Register_ARG_MIN(); -TfLiteRegistration Register_ASSIGN_VARIABLE(); -TfLiteRegistration Register_AVERAGE_POOL_2D(); -TfLiteRegistration Register_BATCH_TO_SPACE_ND(); -TfLiteRegistration Register_BROADCAST_ARGS(); -TfLiteRegistration Register_BROADCAST_TO(); -TfLiteRegistration Register_CALL_ONCE(); -TfLiteRegistration Register_CAST(); +TfLiteRegistration_V1 Register_ABS(); +TfLiteRegistration_V1 Register_ADD(); +TfLiteRegistration_V1 Register_ADD_N(); +TfLiteRegistration_V1 Register_ARG_MAX(); +TfLiteRegistration_V1 Register_ARG_MIN(); +TfLiteRegistration_V1 Register_ASSIGN_VARIABLE(); +TfLiteRegistration_V1 Register_AVERAGE_POOL_2D(); +TfLiteRegistration_V1 Register_BATCH_TO_SPACE_ND(); +TfLiteRegistration_V1 Register_BROADCAST_ARGS(); +TfLiteRegistration_V1 Register_BROADCAST_TO(); +TfLiteRegistration_V1 Register_CALL_ONCE(); +TfLiteRegistration_V1 Register_CAST(); +TfLiteRegistration_V1 Register_CEIL(); // TODO(b/160234179): Change custom OPs to also return by value. -TfLiteRegistration* Register_CIRCULAR_BUFFER(); -TfLiteRegistration Register_CUMSUM(); -TfLiteRegistration Register_DEPTH_TO_SPACE(); -TfLiteRegistration Register_DEPTHWISE_CONV_2D(); -TfLiteRegistration Register_DEQUANTIZE(); -TfLiteRegistration Register_DIV(); -TfLiteRegistration Register_ELU(); -TfLiteRegistration Register_EXP(); -TfLiteRegistration Register_EXPAND_DIMS(); -TfLiteRegistration Register_FILL(); -TfLiteRegistration Register_FLOOR_DIV(); -TfLiteRegistration Register_FLOOR_MOD(); -TfLiteRegistration Register_GATHER(); -TfLiteRegistration Register_GATHER_ND(); -TfLiteRegistration Register_HARD_SWISH(); -TfLiteRegistration Register_IF(); -TfLiteRegistration Register_L2_POOL_2D(); -TfLiteRegistration Register_LEAKY_RELU(); -TfLiteRegistration Register_LOG_SOFTMAX(); -TfLiteRegistration Register_LOGICAL_AND(); -TfLiteRegistration Register_LOGICAL_OR(); -TfLiteRegistration Register_LOGISTIC(); -TfLiteRegistration Register_MAX_POOL_2D(); -TfLiteRegistration Register_MIRROR_PAD(); -TfLiteRegistration Register_NEG(); -TfLiteRegistration Register_PRELU(); -TfLiteRegistration Register_MUL(); -TfLiteRegistration Register_PAD(); -TfLiteRegistration Register_PADV2(); -TfLiteRegistration Register_QUANTIZE(); -TfLiteRegistration Register_READ_VARIABLE(); -TfLiteRegistration Register_RELU(); -TfLiteRegistration Register_RELU6(); -TfLiteRegistration Register_RESIZE_BILINEAR(); -TfLiteRegistration Register_SELECT_V2(); -TfLiteRegistration Register_SHAPE(); -TfLiteRegistration Register_SLICE(); -TfLiteRegistration Register_SPACE_TO_BATCH_ND(); -TfLiteRegistration Register_SPACE_TO_DEPTH(); -TfLiteRegistration Register_SQUARED_DIFFERENCE(); -TfLiteRegistration Register_SQUEEZE(); -TfLiteRegistration Register_SUB(); -TfLiteRegistration Register_SUM(); -TfLiteRegistration Register_SVDF(); -TfLiteRegistration Register_TRANSPOSE(); -TfLiteRegistration Register_TRANSPOSE_CONV(); +TfLiteRegistration_V1* Register_CIRCULAR_BUFFER(); +TfLiteRegistration_V1 Register_CONCATENATION(); +TfLiteRegistration_V1 Register_CONV_2D(); +TfLiteRegistration_V1 Register_COS(); +TfLiteRegistration_V1 Register_CUMSUM(); +TfLiteRegistration_V1 Register_DEPTH_TO_SPACE(); +TfLiteRegistration_V1 Register_DEPTHWISE_CONV_2D(); +TfLiteRegistration_V1 Register_DEQUANTIZE(); +TfLiteRegistration_V1 Register_DIV(); +TfLiteRegistration_V1 Register_ELU(); +TfLiteRegistration_V1 Register_EQUAL(); +TfLiteRegistration_V1* Register_ETHOSU(); +TfLiteRegistration_V1 Register_EXP(); +TfLiteRegistration_V1 Register_EXPAND_DIMS(); +TfLiteRegistration_V1 Register_FILL(); +TfLiteRegistration_V1 Register_FLOOR(); +TfLiteRegistration_V1 Register_FLOOR_DIV(); +TfLiteRegistration_V1 Register_FLOOR_MOD(); +TfLiteRegistration_V1 Register_FULLY_CONNECTED(); +TfLiteRegistration_V1 Register_GATHER(); +TfLiteRegistration_V1 Register_GATHER_ND(); +TfLiteRegistration_V1 Register_GREATER(); +TfLiteRegistration_V1 Register_GREATER_EQUAL(); +TfLiteRegistration_V1 Register_HARD_SWISH(); +TfLiteRegistration_V1 Register_IF(); +TfLiteRegistration_V1 Register_L2_NORMALIZATION(); +TfLiteRegistration_V1 Register_L2_POOL_2D(); +TfLiteRegistration_V1 Register_LEAKY_RELU(); +TfLiteRegistration_V1 Register_LESS(); +TfLiteRegistration_V1 Register_LESS_EQUAL(); +TfLiteRegistration_V1 Register_LOG(); +TfLiteRegistration_V1 Register_LOG_SOFTMAX(); +TfLiteRegistration_V1 Register_LOGICAL_AND(); +TfLiteRegistration_V1 Register_LOGICAL_NOT(); +TfLiteRegistration_V1 Register_LOGICAL_OR(); +TfLiteRegistration_V1 Register_LOGISTIC(); +TfLiteRegistration_V1 Register_MAX_POOL_2D(); +TfLiteRegistration_V1 Register_MAXIMUM(); +TfLiteRegistration_V1 Register_MEAN(); +TfLiteRegistration_V1 Register_MINIMUM(); +TfLiteRegistration_V1 Register_MIRROR_PAD(); +TfLiteRegistration_V1 Register_MUL(); +TfLiteRegistration_V1 Register_NEG(); +TfLiteRegistration_V1 Register_NOT_EQUAL(); +TfLiteRegistration_V1 Register_PACK(); +TfLiteRegistration_V1 Register_PAD(); +TfLiteRegistration_V1 Register_PADV2(); +TfLiteRegistration_V1 Register_PRELU(); +TfLiteRegistration_V1 Register_QUANTIZE(); +TfLiteRegistration_V1 Register_READ_VARIABLE(); +TfLiteRegistration_V1 Register_REDUCE_MAX(); +TfLiteRegistration_V1 Register_RELU(); +TfLiteRegistration_V1 Register_RELU6(); +TfLiteRegistration_V1 Register_RESIZE_BILINEAR(); +TfLiteRegistration_V1 Register_RESIZE_NEAREST_NEIGHBOR(); +TfLiteRegistration_V1 Register_RSQRT(); +TfLiteRegistration_V1 Register_SELECT_V2(); +TfLiteRegistration_V1 Register_SHAPE(); +TfLiteRegistration_V1 Register_SIN(); +TfLiteRegistration_V1 Register_SLICE(); +TfLiteRegistration_V1 Register_SOFTMAX(); +TfLiteRegistration_V1 Register_SPACE_TO_BATCH_ND(); +TfLiteRegistration_V1 Register_SPACE_TO_DEPTH(); +TfLiteRegistration_V1 Register_SPLIT(); +TfLiteRegistration_V1 Register_SPLIT_V(); +TfLiteRegistration_V1 Register_SQRT(); +TfLiteRegistration_V1 Register_SQUARE(); +TfLiteRegistration_V1 Register_SQUARED_DIFFERENCE(); +TfLiteRegistration_V1 Register_SQUEEZE(); +TfLiteRegistration_V1 Register_STRIDED_SLICE(); +TfLiteRegistration_V1 Register_SUB(); +TfLiteRegistration_V1 Register_SUM(); +TfLiteRegistration_V1 Register_SVDF(); +TfLiteRegistration_V1 Register_TANH(); +TfLiteRegistration_V1 Register_TRANSPOSE(); +TfLiteRegistration_V1 Register_TRANSPOSE_CONV(); // TODO(b/230666079): resolve conflict with xtensa implementation -TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); -TfLiteRegistration Register_VAR_HANDLE(); -TfLiteRegistration Register_WHILE(); -TfLiteRegistration Register_ZEROS_LIKE(); +TfLiteRegistration_V1 Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); +TfLiteRegistration_V1 Register_UNPACK(); +TfLiteRegistration_V1 Register_VAR_HANDLE(); +TfLiteRegistration_V1 Register_WHILE(); +TfLiteRegistration_V1 Register_ZEROS_LIKE(); namespace ops { namespace micro { - -TfLiteRegistration Register_ABS(); -TfLiteRegistration Register_CEIL(); -TfLiteRegistration Register_CONCATENATION(); -TfLiteRegistration Register_COS(); -TfLiteRegistration Register_EQUAL(); -TfLiteRegistration Register_FLOOR(); -TfLiteRegistration Register_GREATER(); -TfLiteRegistration Register_GREATER_EQUAL(); -TfLiteRegistration Register_LESS(); -TfLiteRegistration Register_LESS_EQUAL(); -TfLiteRegistration Register_LOG(); -TfLiteRegistration Register_LOGICAL_NOT(); -TfLiteRegistration Register_MAXIMUM(); -TfLiteRegistration Register_MINIMUM(); -TfLiteRegistration Register_NOT_EQUAL(); -TfLiteRegistration Register_PACK(); -TfLiteRegistration Register_RESHAPE(); -TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR(); -TfLiteRegistration Register_ROUND(); -TfLiteRegistration Register_RSQRT(); -TfLiteRegistration Register_SIN(); -TfLiteRegistration Register_SPLIT(); -TfLiteRegistration Register_SPLIT_V(); -TfLiteRegistration Register_SQRT(); -TfLiteRegistration Register_SQUARE(); -TfLiteRegistration Register_STRIDED_SLICE(); -TfLiteRegistration Register_UNPACK(); -TfLiteRegistration Register_L2_NORMALIZATION(); -TfLiteRegistration Register_TANH(); - +TfLiteRegistration_V1 Register_RESHAPE(); +TfLiteRegistration_V1 Register_ROUND(); } // namespace micro } // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mirror_pad.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mirror_pad.cc index 90d3bd9e0..c6ee1da76 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mirror_pad.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mirror_pad.cc @@ -208,7 +208,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_MIRROR_PAD() { +TfLiteRegistration_V1 Register_MIRROR_PAD() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.cc index 6e3c5f74b..6bc0666e6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.cc @@ -61,7 +61,7 @@ TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration Register_MUL() { +TfLiteRegistration_V1 Register_MUL() { return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h index 91b93c6ec..3ceaa55a4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h @@ -61,13 +61,13 @@ void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node, TfLiteEvalTensor* output); // Generic must define registration function. -TfLiteRegistration Register_MUL(); +TfLiteRegistration_V1 Register_MUL(); #if defined(CMSIS_NN) -TfLiteRegistration Register_MUL_INT8(); +TfLiteRegistration_V1 Register_MUL_INT8(); #else // Fallback registration -inline TfLiteRegistration Register_MUL_INT8() { return Register_MUL(); } +inline TfLiteRegistration_V1 Register_MUL_INT8() { return Register_MUL(); } #endif } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc index dd5dfc40c..45e7c1e4c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); - if (output->type == kTfLiteInt8) { + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &data->output_activation_min, &data->output_activation_max)); @@ -68,6 +68,12 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, data->input1_zero_point = input1->params.zero_point; data->input2_zero_point = input2->params.zero_point; data->output_zero_point = output->params.zero_point; + + if (input1->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, data->input1_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->input2_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->output_zero_point, 0); + } } else if (output->type == kTfLiteInt32) { CalculateActivationRange(params->activation, &data->output_activation_min, &data->output_activation_max); @@ -148,9 +154,9 @@ TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node, tflite::micro::GetTensorData(output)); } } else if (input1->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0.0); - TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0.0); - TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0.0); + TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0); if (need_broadcast) { reference_integer_ops::BroadcastMul4DSlow( diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/neg.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/neg.cc index db26f6c69..cde9979fd 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/neg.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/neg.cc @@ -50,7 +50,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_NEG() { +TfLiteRegistration_V1 Register_NEG() { return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc index 5e322b87b..4c2a9724c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace pack { + namespace { constexpr int kOutputTensor = 0; @@ -106,12 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -} // namespace pack -TfLiteRegistration Register_PACK() { - return tflite::micro::RegisterOp(nullptr, nullptr, pack::Eval); +TfLiteRegistration_V1 Register_PACK() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pad.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pad.cc index 579df1a6b..2fff1cddf 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pad.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pad.cc @@ -218,12 +218,12 @@ TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration Register_PAD() { +TfLiteRegistration_V1 Register_PAD() { return tflite::micro::RegisterOp(Init, PadPrepare, Eval); } // Also register Pad as PadV2. -TfLiteRegistration Register_PADV2() { +TfLiteRegistration_V1 Register_PADV2() { return tflite::micro::RegisterOp(Init, PadPrepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc index d9b147ad8..87871a458 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,7 +43,12 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { AveragePoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - AveragePoolingEvalQuantized(context, node, params, data, input, output); + AveragePoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + AveragePoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Input type %s is not currently supported", @@ -71,7 +76,12 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { MaxPoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - MaxPoolingEvalQuantized(context, node, params, data, input, output); + MaxPoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + MaxPoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Type %s not currently supported.", @@ -88,11 +98,11 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { } // namespace -TfLiteRegistration Register_AVERAGE_POOL_2D() { +TfLiteRegistration_V1 Register_AVERAGE_POOL_2D() { return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval); } -TfLiteRegistration Register_MAX_POOL_2D() { +TfLiteRegistration_V1 Register_MAX_POOL_2D() { return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h index 493250ee1..800b21be8 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,7 +20,14 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/micro_log.h" namespace tflite { @@ -50,33 +57,83 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::AveragePool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} -#if defined(CMSIS_NN) -TfLiteRegistration Register_AVERAGE_POOL_2D_INT8(); +#if defined(CMSIS_NN) || defined(XTENSA) +TfLiteRegistration_V1 Register_AVERAGE_POOL_2D_INT8(); -TfLiteRegistration Register_MAX_POOL_2D_INT8(); +TfLiteRegistration_V1 Register_MAX_POOL_2D_INT8(); + +TfLiteRegistration_V1 Register_AVERAGE_POOL_2D_INT16(); + +TfLiteRegistration_V1 Register_MAX_POOL_2D_INT16(); #else -inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { +inline TfLiteRegistration_V1 Register_AVERAGE_POOL_2D_INT8() { + return tflite::Register_AVERAGE_POOL_2D(); +} + +inline TfLiteRegistration_V1 Register_MAX_POOL_2D_INT8() { + return tflite::Register_MAX_POOL_2D(); +} + +inline TfLiteRegistration_V1 Register_AVERAGE_POOL_2D_INT16() { return tflite::Register_AVERAGE_POOL_2D(); } -inline TfLiteRegistration Register_MAX_POOL_2D_INT8() { +inline TfLiteRegistration_V1 Register_MAX_POOL_2D_INT16() { return tflite::Register_MAX_POOL_2D(); } #endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc index ddc18f0bb..b39e9d846 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,10 +69,14 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) { if (input->type == kTfLiteFloat32) { CalculateActivationRange(params->activation, &data->activation_min_f32, &data->activation_max_f32); - } else if (input->type == kTfLiteInt8) { + } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { CalculateActivationRangeQuantized(context, params->activation, output, &data->activation_min, &data->activation_max); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; } micro_context->DeallocateTempTfLiteTensor(input); @@ -102,30 +106,6 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, tflite::micro::GetTensorData(output)); } -void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->type == kTfLiteInt8); - - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::AveragePool( - op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, @@ -145,26 +125,4 @@ void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, tflite::micro::GetTensorData(output)); } -void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::MaxPool(op_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/prelu.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/prelu.cc index f4294723f..62e8eb9dd 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/prelu.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/prelu.cc @@ -68,7 +68,7 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { } } -TfLiteRegistration Register_PRELU() { +TfLiteRegistration_V1 Register_PRELU() { return tflite::micro::RegisterOp(PreluInit, PreluPrepare, PreluEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/quantize.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/quantize.cc index b5eb9c3c9..0e3336d9f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/quantize.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/quantize.cc @@ -33,7 +33,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { } // namespace -TfLiteRegistration Register_QUANTIZE() { +TfLiteRegistration_V1 Register_QUANTIZE() { return tflite::micro::RegisterOp(Init, PrepareQuantizeReference, EvalQuantizeReference); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/read_variable.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/read_variable.cc index 600a1bdd5..d173bc5f5 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/read_variable.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/read_variable.cc @@ -80,7 +80,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_READ_VARIABLE() { +TfLiteRegistration_V1 Register_READ_VARIABLE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.cc index b4734f932..810d96f66 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.cc @@ -57,15 +57,15 @@ TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) { static_cast(node->user_data)); } -TfLiteRegistration Register_MEAN() { +TfLiteRegistration_V1 Register_MEAN() { return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean); } -TfLiteRegistration Register_REDUCE_MAX() { +TfLiteRegistration_V1 Register_REDUCE_MAX() { return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax); } -TfLiteRegistration Register_SUM() { +TfLiteRegistration_V1 Register_SUM() { return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.h index 5956974ef..3b70665d4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reduce.h @@ -56,9 +56,9 @@ TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node, void ReduceResolveAxis(const int* axis_data, int axis_count, MeanParams* op_params); -TfLiteRegistration Register_MEAN(); -TfLiteRegistration Register_REDUCE_MAX(); -TfLiteRegistration Register_SUM(); +TfLiteRegistration_V1 Register_MEAN(); +TfLiteRegistration_V1 Register_REDUCE_MAX(); +TfLiteRegistration_V1 Register_SUM(); } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc index 832ba2612..7c8549a32 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -68,6 +68,11 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) { } } if (stretch_dim != -1) { + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_STATUS(tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + output_shape = output->dims; // output tensor dims were moved output_shape->data[stretch_dim] = num_input_elements / num_output_elements; num_output_elements *= output_shape->data[stretch_dim]; } @@ -109,7 +114,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace reshape -TfLiteRegistration Register_RESHAPE() { +TfLiteRegistration_V1 Register_RESHAPE() { return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_bilinear.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_bilinear.cc index 56432e1b5..48f3b9d60 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_bilinear.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_bilinear.cc @@ -109,7 +109,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_RESIZE_BILINEAR() { +TfLiteRegistration_V1 Register_RESIZE_BILINEAR() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc index 756cf03fa..c6c8f6ff6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,9 +24,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace resize_nearest_neighbor { + +namespace { constexpr int kInputTensor = 0; constexpr int kSizeTensor = 1; @@ -114,13 +113,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace resize_nearest_neighbor -TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() { - return tflite::micro::RegisterOp(nullptr, resize_nearest_neighbor::Prepare, - resize_nearest_neighbor::Eval); +} // namespace + +TfLiteRegistration_V1 Register_RESIZE_NEAREST_NEIGHBOR() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/round.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/round.cc index 0bda8783a..8db5fa2e1 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/round.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/round.cc @@ -67,7 +67,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace round -TfLiteRegistration Register_ROUND() { +TfLiteRegistration_V1 Register_ROUND() { return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/select.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/select.cc index 1b05bd2fb..d467c07f3 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/select.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/select.cc @@ -189,7 +189,7 @@ TfLiteStatus SelectEval(TfLiteContext* context, TfLiteNode* node) { // // 1. Either the same shape (in which case the select is elementwise), or // 2. Broadcastable shapes between 'condition', 'x' and 'y'. -TfLiteRegistration Register_SELECT_V2() { +TfLiteRegistration_V1 Register_SELECT_V2() { return tflite::micro::RegisterOp(tflite::SelectInit, tflite::SelectPrepare, tflite::SelectEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/shape.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/shape.cc index e85bb81f7..3ced32099 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/shape.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/shape.cc @@ -60,7 +60,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_SHAPE() { +TfLiteRegistration_V1 Register_SHAPE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/slice.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/slice.cc index cc3cd5b42..90e977a02 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/slice.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/slice.cc @@ -140,6 +140,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; + case kTfLiteBool: + reference_ops::Slice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; default: MicroPrintf("Input tensor type %s (%d) not supported.", TfLiteTypeGetName(input->type), input->type); @@ -150,7 +157,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_SLICE() { +TfLiteRegistration_V1 Register_SLICE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.cc index 67a1b4065..33a20f386 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.cc @@ -83,7 +83,7 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_SOFTMAX() { +TfLiteRegistration_V1 Register_SOFTMAX() { return tflite::micro::RegisterOp(SoftmaxInit, SoftmaxPrepare, SoftmaxEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.h index 7096d2020..9e30bb561 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/softmax.h @@ -32,34 +32,36 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node); -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_SOFTMAX(); +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +TfLiteRegistration_V1 Register_SOFTMAX(); #if defined(XTENSA) || defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8 input and int16 output. -TfLiteRegistration Register_SOFTMAX_INT8_INT16(); +TfLiteRegistration_V1 Register_SOFTMAX_INT8_INT16(); #else -inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() { +inline TfLiteRegistration_V1 Register_SOFTMAX_INT8_INT16() { return Register_SOFTMAX(); } #endif #if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int8 input/output and uses the latency optimized implementations. -TfLiteRegistration Register_SOFTMAX_INT8(); +TfLiteRegistration_V1 Register_SOFTMAX_INT8(); -// Returns a TfLiteRegistration struct for kernel variant that only supports +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports // int16 input/output and uses the latency optimized implementations. -TfLiteRegistration Register_SOFTMAX_INT16(); +TfLiteRegistration_V1 Register_SOFTMAX_INT16(); #else -inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); } +inline TfLiteRegistration_V1 Register_SOFTMAX_INT8() { + return Register_SOFTMAX(); +} -inline TfLiteRegistration Register_SOFTMAX_INT16() { +inline TfLiteRegistration_V1 Register_SOFTMAX_INT16() { return Register_SOFTMAX(); } #endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_batch_nd.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_batch_nd.cc index 11b32c3f6..a4dab2af7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_batch_nd.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_batch_nd.cc @@ -114,7 +114,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_SPACE_TO_BATCH_ND() { +TfLiteRegistration_V1 Register_SPACE_TO_BATCH_ND() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_depth.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_depth.cc index 3640e2cdb..99837ee01 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_depth.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/space_to_depth.cc @@ -120,7 +120,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_SPACE_TO_DEPTH() { +TfLiteRegistration_V1 Register_SPACE_TO_DEPTH() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc index 4ff748562..97d9a2d11 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,9 +21,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace split { + +namespace { template TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, @@ -117,12 +116,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace split +} // namespace -TfLiteRegistration Register_SPLIT() { - return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval); +TfLiteRegistration_V1 Register_SPLIT() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc index d0002d57c..e79e5e8b6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,9 +22,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace split_v { + +namespace { template TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, @@ -119,12 +118,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace split_v +} // namespace -TfLiteRegistration Register_SPLIT_V() { - return tflite::micro::RegisterOp(nullptr, split_v::Prepare, split_v::Eval); +TfLiteRegistration_V1 Register_SPLIT_V() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc index 8786a8715..b0cd389cb 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc @@ -44,6 +44,44 @@ void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer, return context->AllocatePersistentBuffer(context, sizeof(OpData)); } +void PrepareQuantized( + const TfLiteQuantizationParams& input1_quantization_params, + const TfLiteQuantizationParams& input2_quantization_params, + const TfLiteQuantizationParams& output_quantization_params, + const int left_shift, const int32_t quantized_activation_min, + const int32_t quantized_activation_max, OpData* data) { + data->arithmetic_params.input1_offset = + -input1_quantization_params.zero_point; + data->arithmetic_params.input2_offset = + -input2_quantization_params.zero_point; + data->arithmetic_params.output_offset = output_quantization_params.zero_point; + data->arithmetic_params.left_shift = left_shift; + const double twice_max_input_scale = + 2.0 * static_cast(std::max(input1_quantization_params.scale, + input2_quantization_params.scale)); + const double real_input1_multiplier = + static_cast(input1_quantization_params.scale) / + twice_max_input_scale; + double real_input2_multiplier = + static_cast(input2_quantization_params.scale) / + twice_max_input_scale; + const double real_output_multiplier = + (twice_max_input_scale * twice_max_input_scale) / + static_cast((1 << data->arithmetic_params.left_shift * 2) * + output_quantization_params.scale); + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->arithmetic_params.input1_multiplier, + &data->arithmetic_params.input1_shift); + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->arithmetic_params.input2_multiplier, + &data->arithmetic_params.input2_shift); + QuantizeMultiplier(real_output_multiplier, + &data->arithmetic_params.output_multiplier, + &data->arithmetic_params.output_shift); + data->arithmetic_params.quantized_activation_min = quantized_activation_min; + data->arithmetic_params.quantized_activation_max = quantized_activation_max; +} + TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); @@ -68,11 +106,10 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); output->type = input2->type; - // Ensure the quantization parameters are equivalent. + const TfLiteQuantizationParams& input1_quantization_params = input1->params; + const TfLiteQuantizationParams& input2_quantization_params = input2->params; + const TfLiteQuantizationParams& output_quantization_params = output->params; if (input1->type == kTfLiteInt8) { - const auto& input1_quantization_params = input1->params; - const auto& input2_quantization_params = input2->params; - const auto& output_quantization_params = output->params; const int32_t integer_type_min = std::numeric_limits::min(); const int32_t integer_type_max = std::numeric_limits::max(); TF_LITE_ENSURE(context, @@ -87,43 +124,25 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, output_quantization_params.zero_point >= integer_type_min); TF_LITE_ENSURE(context, output_quantization_params.zero_point <= integer_type_max); - data->arithmetic_params.input1_offset = - -input1_quantization_params.zero_point; - data->arithmetic_params.input2_offset = - -input2_quantization_params.zero_point; - data->arithmetic_params.output_offset = - output_quantization_params.zero_point; - - // shift to make integer for scales. - // 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 )) - // does not overflow signed 32-bit integer - data->arithmetic_params.left_shift = 7; - const double twice_max_input_scale = - 2.0 * static_cast(std::max(input1_quantization_params.scale, - input2_quantization_params.scale)); - const double real_input1_multiplier = - static_cast(input1_quantization_params.scale) / - twice_max_input_scale; - double real_input2_multiplier = - static_cast(input2_quantization_params.scale) / - twice_max_input_scale; - const double real_output_multiplier = - (twice_max_input_scale * twice_max_input_scale) / - static_cast((1 << data->arithmetic_params.left_shift * 2) * - output_quantization_params.scale); - QuantizeMultiplierSmallerThanOneExp( - real_input1_multiplier, &data->arithmetic_params.input1_multiplier, - &data->arithmetic_params.input1_shift); - QuantizeMultiplierSmallerThanOneExp( - real_input2_multiplier, &data->arithmetic_params.input2_multiplier, - &data->arithmetic_params.input2_shift); - QuantizeMultiplierSmallerThanOneExp( - real_output_multiplier, &data->arithmetic_params.output_multiplier, - &data->arithmetic_params.output_shift); - data->arithmetic_params.quantized_activation_min = - std::numeric_limits::min(); - data->arithmetic_params.quantized_activation_max = - std::numeric_limits::max(); + // leftshift = 7 is selected so that maximum shifted result 255^2 * (1 << (7 + // * 2 )) does not overflow signed 32-bit integer + PrepareQuantized(input1_quantization_params, input2_quantization_params, + output_quantization_params, /*left_shift=*/7, + /*quantized_activation_min*/ integer_type_min, + /*quantized_activation_max*/ integer_type_max, data); + } else if (input1->type == kTfLiteInt16) { + const int32_t integer_type_min = std::numeric_limits::min(); + const int32_t integer_type_max = std::numeric_limits::max(); + TF_LITE_ENSURE(context, input1_quantization_params.zero_point == 0); + TF_LITE_ENSURE(context, input2_quantization_params.zero_point == 0); + TF_LITE_ENSURE(context, output_quantization_params.zero_point == 0); + + // leftshift = 0 as number is already 16-bit. so that maximum shifted result + // 32767^2 * (1 << (0 * 2 )) + PrepareQuantized(input1_quantization_params, input2_quantization_params, + output_quantization_params, /*left_shift=*/0, + /*quantized_activation_min*/ integer_type_min, + /*quantized_activation_max*/ integer_type_max, data); } data->requires_broadcast = !HaveSameShapes(input1, input2); @@ -134,8 +153,8 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, return kTfLiteOk; } -inline int8_t SquaredDifference(int8_t x, int8_t y, - const ArithmeticParams& params) { +template +T SquaredDifference(T x, T y, const ArithmeticParams& params) { const int32_t input1_val = params.input1_offset + x; const int32_t input2_val = params.input2_offset + y; const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); @@ -148,16 +167,16 @@ inline int8_t SquaredDifference(int8_t x, int8_t y, shifted_input2_val, params.input2_multiplier, params.input2_shift); const int32_t raw_diff = scaled_input1_val - scaled_input2_val; - // Max of this is 255^2 * (1 << 14), so won't overflow 32 bits. + // Max of this is 32767^2 * (1 << 0), so won't overflow 32 bits. const int32_t squared_raw_diff = raw_diff * raw_diff; const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - squared_raw_diff, params.output_multiplier, params.output_shift) + + MultiplyByQuantizedMultiplier(squared_raw_diff, params.output_multiplier, + params.output_shift) + params.output_offset; const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - return static_cast(clamped_output); + return static_cast(clamped_output); } template @@ -180,9 +199,9 @@ void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node, const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize(); reference_integer_ops::ElementWise( flat_size, op_data->arithmetic_params, - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorData(output), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorData(output), reference_integer_ops::CheckArithmeticParams, SquaredDifference); } } @@ -228,9 +247,13 @@ TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) { } else if (output->type == kTfLiteInt8) { EvalQuantizedSquaredDifference(context, node, data, input1, input2, output); + } else if (output->type == kTfLiteInt16) { + EvalQuantizedSquaredDifference(context, node, data, input1, input2, + output); } else { MicroPrintf( - "SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.", + "SquaredDifference only supports FLOAT32, INT32 , INT16 and INT8 now, " + "got %d.", output->type); return kTfLiteError; } @@ -239,7 +262,7 @@ TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_SQUARED_DIFFERENCE() { +TfLiteRegistration_V1 Register_SQUARED_DIFFERENCE() { return tflite::micro::RegisterOp( SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squeeze.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squeeze.cc index 017538498..3ebf448de 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squeeze.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squeeze.cc @@ -111,7 +111,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_SQUEEZE() { +TfLiteRegistration_V1 Register_SQUEEZE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc index 9985cf913..e31f32c6d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,9 +26,8 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace strided_slice { + +namespace { constexpr int kInputTensor = 0; constexpr int kBeginTensor = 1; @@ -198,13 +197,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } return kTfLiteOk; } -} // namespace strided_slice -TfLiteRegistration Register_STRIDED_SLICE() { - return tflite::micro::RegisterOp(strided_slice::Init, strided_slice::Prepare, - strided_slice::Eval); +} // namespace + +TfLiteRegistration_V1 Register_STRIDED_SLICE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub.cc index a54c488fd..38df0bb20 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub.cc @@ -161,7 +161,7 @@ TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -TfLiteRegistration Register_SUB() { +TfLiteRegistration_V1 Register_SUB() { return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc index 7ad3aa6aa..d6647462f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc @@ -52,14 +52,12 @@ TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, const float twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = - static_cast(input1->params.scale) / - static_cast(twice_max_input_scale); + static_cast(input1->params.scale / twice_max_input_scale); const double real_input2_multiplier = - static_cast(input2->params.scale) / - static_cast(twice_max_input_scale); + static_cast(input2->params.scale / twice_max_input_scale); const double real_output_multiplier = - static_cast(twice_max_input_scale) / - ((1 << data->left_shift) * static_cast(output->params.scale)); + static_cast(twice_max_input_scale / + ((1 << data->left_shift) * output->params.scale)); QuantizeMultiplierSmallerThanOneExp( real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.cc index f6aa4e8b9..9524b9eb8 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.cc @@ -99,7 +99,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_SVDF() { +TfLiteRegistration_V1 Register_SVDF() { return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.h index 0915c9fdc..8ee912d7b 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf.h @@ -77,13 +77,13 @@ void EvalFloatSvdfReference( TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node); -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_SVDF(); +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +TfLiteRegistration_V1 Register_SVDF(); #if defined(HEXAGON) || defined(CMSIS_NN) -TfLiteRegistration Register_SVDF_INT8(); +TfLiteRegistration_V1 Register_SVDF_INT8(); #else // Note that while this block gets used for both reference and optimized kernels @@ -91,7 +91,7 @@ TfLiteRegistration Register_SVDF_INT8(); // define fallback implementation that allow reference kernels to still be used // from applications that call a more specific kernel variant. -inline TfLiteRegistration Register_SVDF_INT8() { return Register_SVDF(); } +inline TfLiteRegistration_V1 Register_SVDF_INT8() { return Register_SVDF(); } #endif } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc index ed74358bc..fb92b4fd7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc @@ -451,21 +451,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - const double effective_scale_1 = - static_cast(input->params.scale) * - static_cast(weights_feature->params.scale) / - static_cast(activation_state->params.scale); + const double effective_scale_1 = static_cast( + input->params.scale * weights_feature->params.scale / + activation_state->params.scale); const double effective_scale_2 = - static_cast(activation_state->params.scale) * - static_cast(weights_time->params.scale) / - static_cast(output->params.scale); + static_cast(activation_state->params.scale * + weights_time->params.scale / output->params.scale); // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready. TF_LITE_ENSURE( context, std::abs(static_cast(bias->params.scale) - - (static_cast(activation_state->params.scale) * - static_cast(weights_time->params.scale))) < 1e-5); + static_cast(activation_state->params.scale * + weights_time->params.scale)) < 1e-5); QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a), &(data->effective_scale_1_b)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc index e10399307..060cb38cc 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,10 +28,9 @@ limitations under the License. #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { -namespace ops { -namespace micro { -namespace activations { + namespace { + constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; @@ -148,8 +147,6 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace - TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, kInputTensor); @@ -193,12 +190,10 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { } } -} // namespace activations +} // namespace -TfLiteRegistration Register_TANH() { - return tflite::micro::RegisterOp( - activations::TanhInit, activations::TanhPrepare, activations::TanhEval); +TfLiteRegistration_V1 Register_TANH() { + return tflite::micro::RegisterOp(TanhInit, TanhPrepare, TanhEval); } -} // namespace micro -} // namespace ops + } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD index e7187ef0d..12ddd6934 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD @@ -1,3 +1,5 @@ +load("@tflm_pip_deps//:requirements.bzl", "requirement") + package( default_visibility = ["//tensorflow/lite/micro/kernels:__pkg__"], # Disabling layering_check because of http://b/177257332 @@ -15,3 +17,48 @@ cc_library( hdrs = ["conv_test_data.h"], deps = ["//tensorflow/lite/c:common"], ) + +cc_library( + name = "lstm_test_data", + srcs = ["lstm_test_data.cc"], + hdrs = [ + "lstm_test_data.h", + ], + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/micro:test_helpers", + "//tensorflow/lite/micro/kernels:lstm_shared", + ], +) + +#################################### +# Python +#################################### +py_binary( + name = "lstm_test_data_generator", + srcs = [ + "lstm_test_data_generator.py", + "lstm_test_data_utils.py", + ], + srcs_version = "PY3", + deps = [ + "@absl_py//absl:app", + requirement("numpy"), + requirement("tensorflow-cpu"), + ], +) + +py_test( + name = "lstm_test_data_generator_test", + srcs = ["lstm_test_data_generator_test.py"], + main = "lstm_test_data_generator_test.py", + python_version = "PY3", + tags = [ + "noasan", + "nomsan", # Python doesn't like these symbols from interpreter_wrapper_pybind.so + "noubsan", + ], + deps = [ + ":lstm_test_data_generator", + ], +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc new file mode 100644 index 000000000..4d7d9d9ed --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc @@ -0,0 +1,309 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" + +#include + +namespace tflite { +namespace testing { + +namespace { +// LSTM internal setting (e.g., nonlinear activation type) +// Only UnidirectionalLSTM is supported now +constexpr TfLiteUnidirectionalSequenceLSTMParams kDefaultBuiltinData = { + /*.activation=*/kTfLiteActTanh, + /*.cell_clip=*/6, + /*.proj_clip=*/3, + /*.time_major=*/false, + /*.asymmetric_quantize_inputs=*/true, + /*diagonal_recurrent_tensors=*/false}; +} // namespace + +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData() { + GateOutputCheckData<4, 4> gate_data; + const float input_data[4] = { + 0.2, 0.3, // batch1 + -0.98, 0.62 // batch2 + }; + std::memcpy(gate_data.input_data, input_data, 4 * sizeof(float)); + + const float hidden_state[4] = { + -0.1, 0.2, // batch1 + -0.3, 0.5 // batch2 + }; + std::memcpy(gate_data.hidden_state, hidden_state, 4 * sizeof(float)); + + const float cell_state[4] = { + -1.3, 6.2, // batch1 + -7.3, 3.5 // batch2 + }; + std::memcpy(gate_data.cell_state, cell_state, 4 * sizeof(float)); + + // Use the forget gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[-10,-10],[-20,-20]][0.2, + // +[[-10,-10],[-20,-20]][-0.1, 0.2]+[1,2]) = sigmoid([-5,-10]) = + // [6.69285092e-03, 4.53978687e-05] (Batch1) + // Similarly, we have [0.93086158 0.9945137 ] for batch 2 + const float expected_forget_gate_output[4] = {6.69285092e-3f, 4.53978687e-5f, + 0.93086158, 0.9945137}; + std::memcpy(gate_data.expected_forget_gate_output, + expected_forget_gate_output, 4 * sizeof(float)); + + // Use the input gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[10,10],[20,20]][0.2, 0.3] + // +[[10,10],[20,20]][-0.1, 0.2]+[-1,-2]) = sigmoid([5,10]) = + // [0.99330715, 0.9999546] + // Similarly, we have [0.06913842 0.0054863 ] for batch 2 + const float expected_input_gate_output[4] = {0.99330715, 0.9999546, + 0.06913842, 0.0054863}; + std::memcpy(gate_data.expected_input_gate_output, expected_input_gate_output, + 4 * sizeof(float)); + + // Use the output gate parameters to test normnal gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = sigmoid([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [[0.46008512 0.46008512]] for batch 2 + const float expected_output_gate_output[4] = { + 0.6456563062257954, 0.6456563062257954, 0.46008512, 0.46008512}; + std::memcpy(gate_data.expected_output_gate_output, + expected_output_gate_output, 4 * sizeof(float)); + + // Use the cell(modulation) gate parameters to tanh output + // output = tanh(W_i*i+W_h*h+b) = tanh([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = tanh([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [-0.1586485 -0.1586485] for batch 2 + const float expected_cell_gate_output[4] = { + 0.5370495669980353, 0.5370495669980353, -0.1586485, -0.1586485}; + std::memcpy(gate_data.expected_cell_gate_output, expected_cell_gate_output, + 4 * sizeof(float)); + + // Cell = forget_gate*cell + input_gate*cell_gate + // Note -6.80625824 is clipped to -6 + const float expected_updated_cell[4] = {0.52475447, 0.53730665, -6, + 3.47992756}; + std::memcpy(gate_data.expected_updated_cell, expected_updated_cell, + 4 * sizeof(float)); + + // Use the updated cell state to update the hidden state + // tanh(expected_updated_cell) * expected_output_gate_output + const float expected_updated_hidden[4] = {0.31079388, 0.3169827, -0.46007947, + 0.45921249}; + std::memcpy(gate_data.expected_updated_hidden, expected_updated_hidden, + 4 * sizeof(float)); + return gate_data; +} + +// TODO(b/253466487): document how the golden values are arrived at +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData() { + LstmEvalCheckData<12, 4, 12> eval_data; + const float input_data[12] = { + 0.2, 0.3, 0.2, 0.3, 0.2, 0.3, // batch one + -0.98, 0.62, 0.01, 0.99, 0.49, -0.32 // batch two + }; + std::memcpy(eval_data.input_data, input_data, 12 * sizeof(float)); + + // Initialize hidden state as zeros + const float hidden_state[4] = {}; + std::memcpy(eval_data.hidden_state, hidden_state, 4 * sizeof(float)); + + // The expected model output after 3 time steps using the fixed input and + // parameters + const float expected_output[12] = { + 0.26455893, 0.26870455, 0.47935803, + 0.47937014, 0.58013272, 0.58013278, // batch1 + -1.41184672e-3f, -1.43329117e-5f, 0.46887168, + 0.46891281, 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_output, expected_output, 12 * sizeof(float)); + + const float expected_hidden_state[4] = { + 0.58013272, 0.58013278, // batch1 + 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_hidden_state, expected_hidden_state, + 4 * sizeof(float)); + + const float expected_cell_state[4] = { + 0.89740515, 0.8974053, // batch1 + 0.80327607, 0.80327785 // batch2 + }; + std::memcpy(eval_data.expected_cell_state, expected_cell_state, + 4 * sizeof(float)); + return eval_data; +} + +LstmNodeContent +Create2x3x2X2FloatNodeContents(const float* input_data, + const float* hidden_state_data, + const float* cell_state_data) { + // Parameters for different gates + // negative large weights for forget gate to make it really forget + const GateData forget_gate_data = { + /*.activation_weight=*/{-10, -10, -20, -20}, + /*.recurrent_weight=*/{-10, -10, -20, -20}, + /*.fused_bias=*/{1, 2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // positive large weights for input gate to make it really remember + const GateData input_gate_data = { + /*.activation_weight=*/{10, 10, 20, 20}, + /*.recurrent_weight=*/{10, 10, 20, 20}, + /*.fused_bias=*/{-1, -2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of tanh at normal range (-1,1) + const GateData cell_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of sigmoid at normal range (-1. 1) + const GateData output_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + + LstmNodeContent float_node_contents( + kDefaultBuiltinData, forget_gate_data, input_gate_data, cell_gate_data, + output_gate_data); + + if (input_data != nullptr) { + float_node_contents.SetInputData(input_data); + } + if (hidden_state_data != nullptr) { + float_node_contents.SetHiddenStateData(hidden_state_data); + } + if (cell_state_data != nullptr) { + float_node_contents.SetCellStateData(cell_state_data); + } + return float_node_contents; +} + +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings() { + NodeQuantizationParameters quantization_settings; + quantization_settings.activation_type = kTfLiteInt8; + quantization_settings.weight_type = kTfLiteInt8; + quantization_settings.cell_type = kTfLiteInt16; + quantization_settings.bias_type = kTfLiteInt32; + quantization_settings.nonlinear_activation_input_scale = + 0.00024414062; // std::pow(2.0f, -12.0f) + quantization_settings.nonlinear_activation_output_scale = + 0.00003051757; // std::pow(2.0f, -15.0f) + + // state quantization parameters + quantization_settings.input = {/*scale=*/0.00784313725490196, /*zp=*/0, + /*symmetry=*/false}; + quantization_settings.output = {/*scale=*/0.004705882165580988, /*zp=*/-21, + /*symmetry=*/false}; + quantization_settings.hidden_state = {/*scale=*/0.004705882165580988, + /*zp=*/-21, /*symmetry=*/false}; + quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0, + /*symmetry=*/true}; + + // gate quantization parameters + quantization_settings.forget_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.input_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.cell_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.output_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}}; + return quantization_settings; +} + +NodeQuantizationParameters Get2X2Int16LstmQuantizationSettings() { + NodeQuantizationParameters quantization_settings; + quantization_settings.activation_type = kTfLiteInt16; + quantization_settings.weight_type = kTfLiteInt8; + quantization_settings.cell_type = kTfLiteInt16; + quantization_settings.bias_type = kTfLiteInt64; + quantization_settings.nonlinear_activation_input_scale = + 0.00024414062; // std::pow(2.0f, -12.0f) + quantization_settings.nonlinear_activation_output_scale = + 0.00003051757; // std::pow(2.0f, -15.0f) + + // state quantization parameters + quantization_settings.input = {/*scale=*/3.0518044e-5, /*zp=*/0, + /*symmetry=*/false}; + quantization_settings.output = {/*scale=*/1.8310826e-5, /*zp=*/-5461, + /*symmetry=*/false}; + quantization_settings.hidden_state = {/*scale=*/1.8310826e-5, /*zp=*/-5461, + /*symmetry=*/false}; + quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0, + /*symmetry=*/true}; + + // gate quantization parameters + quantization_settings.forget_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.input_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.cell_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.output_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}}; + return quantization_settings; +} + +LstmNodeContent +Create2x3x2X2Int8NodeContents(const float* input_data, + const float* hidden_state, + const float* cell_state) { + auto float_node_content = + Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state); + const auto quantization_settings = Get2X2Int8LstmQuantizationSettings(); + return CreateIntegerNodeContents(quantization_settings, + /*fold_zero_point=*/true, + float_node_content); +} + +LstmNodeContent +Create2x3x2X2Int16NodeContents(const float* input_data, + const float* hidden_state, + const float* cell_state) { + auto float_node_content = + Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state); + const auto quantization_settings = Get2X2Int16LstmQuantizationSettings(); + return CreateIntegerNodeContents(quantization_settings, + /*fold_zero_point=*/false, + float_node_content); +} + +} // namespace testing +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h new file mode 100644 index 000000000..3edf4200a --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h @@ -0,0 +1,579 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/test_helpers.h" + +namespace tflite { +namespace testing { +// Data structure to store all the data used to check output of internal gates +// of one time step +// input_size = batch_size*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +template +struct GateOutputCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float cell_state[gate_output_size]; + float expected_forget_gate_output[gate_output_size]; + float expected_input_gate_output[gate_output_size]; + float expected_output_gate_output[gate_output_size]; + float expected_cell_gate_output[gate_output_size]; + float expected_updated_cell[gate_output_size]; + float expected_updated_hidden[gate_output_size]; +}; + +// Data structure to store all the data used to check the output of the kernel +// of multiple batch, multiple timesteps +// input_size = batch_size*time_steps*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +// output_size = time_steps*gate_output_size (size of the output from the +// kernel) +template +struct LstmEvalCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float expected_output[output_size]; + float expected_hidden_state[gate_output_size]; + float expected_cell_state[gate_output_size]; +}; + +// Struct that holds the weight/bias information for a standard gate (i.e. no +// modification such as layer normalization, peephole, etc.) +// Every gate is defined by the type and size of the weights (bias included) +// inside. +// Specifically, types are weight type and bias type (normally the same +// type of MatMul accumulator). +// activation_weight has shape (hidden state dimension * input tensor dimension) +// recurrent_weight has shape (hidden state dimension * hidden state dimension) +// bias has shape (hidden state dimension, 1) +template +struct GateData { + WeightType activation_weight[state_dimension * input_dimension]; + WeightType recurrent_weight[state_dimension * state_dimension]; + BiasType fused_bias[state_dimension]; + // Quantized model folded the zero point of activations into biases: + // bias + zero_point * weight. + // Note: folded bias is only required for the legacy 8x8->16 pass. Therefore + // the data type is fixed here to avoid compilation errors (the computation of + // folding does not support other types) + int32_t activation_zp_folded_bias[state_dimension]; + int32_t recurrent_zp_folded_bias[state_dimension]; +}; + +// A struct that holds quantization parameters for a LSTM Tensor +struct TensorQuantizationParameters { + double scale; + int zero_point; + bool symmetry; +}; + +// A struct that holds quantization parameters for an internal gate, which is +// defined by activation/recurrent weight and bias (assuming no internal layer +// normalization) +struct GateQuantizationParameters { + TensorQuantizationParameters activation_weight; + TensorQuantizationParameters recurrent_weight; + TensorQuantizationParameters bias; +}; + +// A struct that holds the quantization settings for the LSTM node. Data +// members can be grouped into five parts. +// 1. Data types (activation,weight, cell, bias) +// 2. Non-linear activation (i.e., tanh and sigmoid) fixed point +// calculation settings +// 3. Input/output tensor quantization settings +// 4. Internal state (hidden and cell) quantization settings +// 5. Internal gate (forget, input, cell, output) settings +struct NodeQuantizationParameters { + TfLiteType activation_type; + TfLiteType weight_type; + TfLiteType cell_type; + TfLiteType bias_type; + // Fixed point setting for integer nonlinear activation calculation + double nonlinear_activation_input_scale; + double nonlinear_activation_output_scale; + // Quantization parameters for input/output + TensorQuantizationParameters input; + TensorQuantizationParameters output; + // Quantization parameters for internal states + TensorQuantizationParameters hidden_state; + TensorQuantizationParameters cell_state; + // Quantization parameters for gates + GateQuantizationParameters forget_gate; + GateQuantizationParameters input_gate; + GateQuantizationParameters cell_gate; + GateQuantizationParameters output_gate; +}; + +// Data structure that holds all the information to evaluate a LSTM kernel +// (mimic the LSTM node). +// Tensor Types: +// ActivationType defines the data type of input/output of the layer. The hidden +// state has the ActivationType as well since it is the layer output of the +// previous time. +// WeightType defines the weight data type inside the internal gates. +// BiasType defines the bias data type inside the internal gates. (normally the +// same type of MatMul accumulator). +// Tensor Shapes: +// The input to the layer has shape (batch_size,time_steps,input_dimension). +// Both the hidden state and cell state has shape (state_dimension, 1) +// The output of the layer has shape (batch_size,time_steps,state_dimension) +// Note: state values can change through calls (stateful) +template +class LstmNodeContent { + public: + LstmNodeContent(const LstmNodeContent& other) = default; + LstmNodeContent& operator=(const LstmNodeContent& other) = default; + // Use the general model setting (builtin data) and the four gates data to + // construct the node content. Note the input, hidden state, and cell state + // data is provided later for flexible testing (initialize as zero now) + LstmNodeContent( + const TfLiteUnidirectionalSequenceLSTMParams builtin_data, + const GateData + forget_gate_params, + const GateData + input_gate_params, + const GateData + cell_gate_params, + const GateData + output_gate_params) + : builtin_data_(builtin_data), + forget_gate_data_(forget_gate_params), + input_gate_data_(input_gate_params), + cell_gate_data_(cell_gate_params), + output_gate_data_(output_gate_params) { + InitializeTensors(); + } + + // Add quantization parameters (scale, zero point) to tensors + // Only required for the integer kernel + void AddQuantizationParameters( + const NodeQuantizationParameters& quantization_params) { + quantization_settings_ = quantization_params; + // Input Tensor + SetTensorQuantizationParam(kLstmInputTensor, quantization_params.input); + // Forget Gate Tensors + const auto& forget_gate_quant_param = quantization_params.forget_gate; + SetTensorQuantizationParam(kLstmInputToForgetWeightsTensor, + forget_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToForgetWeightsTensor, + forget_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmForgetGateBiasTensor, + forget_gate_quant_param.bias); + // Input Gate Tensors + const auto& input_gate_quant_param = quantization_params.input_gate; + SetTensorQuantizationParam(kLstmInputToInputWeightsTensor, + input_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToInputWeightsTensor, + input_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmInputGateBiasTensor, + input_gate_quant_param.bias); + // Cell Gate Tensors + const auto& cell_gate_quant_param = quantization_params.cell_gate; + SetTensorQuantizationParam(kLstmInputToCellWeightsTensor, + cell_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToCellWeightsTensor, + cell_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmCellGateBiasTensor, + cell_gate_quant_param.bias); + // Output Gate Tensors + const auto& output_gate_quant_param = quantization_params.output_gate; + SetTensorQuantizationParam(kLstmInputToOutputWeightsTensor, + output_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToOutputWeightsTensor, + output_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmOutputGateBiasTensor, + output_gate_quant_param.bias); + // State Tensors + SetTensorQuantizationParam(kLstmOutputStateTensor, + quantization_params.hidden_state); + SetTensorQuantizationParam(kLstmCellStateTensor, + quantization_params.cell_state); + // Output Tensor + SetTensorQuantizationParam(24, quantization_params.output); + } + + // Provide interface to set the input tensor values for flexible testing + void SetInputData(const ActivationType* data) { + std::memcpy( + input_, data, + batch_size * input_dimension * time_steps * sizeof(ActivationType)); + SetTensor(kLstmInputTensor, input_, input_size_); + } + const ActivationType* GetInputData() const { return input_; } + + // Provide interface to set the hidden state tensor values for flexible + // testing + void SetHiddenStateData(const ActivationType* data) { + std::memcpy(hidden_state_, data, + batch_size * state_dimension * sizeof(ActivationType)); + } + ActivationType* GetHiddenStateData() { return hidden_state_; } + + // Provide interface to set the cell state tensor values for flexible + // testing + void SetCellStateData(const CellType* data) { + std::memcpy(cell_state_, data, + batch_size * state_dimension * sizeof(CellType)); + } + CellType* GetCellStateData() { return cell_state_; } + ActivationType* GetOutputData() { return output_; } + + // Internal tensors, see lstm_shared.h for tensor names + TfLiteEvalTensor* GetEvalTensor(const int tensor_index) { + auto valid_index = input_tensor_indices_[tensor_index + 1]; + if (valid_index < 0) { + return nullptr; + } + return &eval_tensors_[tensor_index]; + } + + TfLiteTensor* GetTensors() { return tensors_; } + + // Required by the kernel runner + TfLiteIntArray* KernelInputs() { + return IntArrayFromInts(input_tensor_indices_); + } + // Required by the kernel runner + TfLiteIntArray* KernelOutputs() { + return IntArrayFromInts(output_tensor_indices_); + } + + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateEvalTensor() { + return &eval_tensors_[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateEvalTensor() { + return &eval_tensors_[kLstmCellStateTensor]; + } + TfLiteEvalTensor* OutputEvalTensor() { return &eval_tensors_[24]; } + + const GateData& + ForgetGateData() const { + return forget_gate_data_; + } + const GateData& + InputGateData() const { + return input_gate_data_; + } + const GateData& + CellGateData() const { + return cell_gate_data_; + } + const GateData& + OutputGateData() const { + return output_gate_data_; + } + + const TfLiteUnidirectionalSequenceLSTMParams& BuiltinData() const { + return builtin_data_; + } + + const NodeQuantizationParameters& QuantizationSettings() const { + return quantization_settings_; + } + + private: + void InitializeTensors() { + // Invalid all the input tensors untill we set it + input_tensor_indices_[0] = 24; // tot elements + for (size_t i = 1; i < 25; i++) { + input_tensor_indices_[i] = kTfLiteOptionalTensor; + } + // Input Tensor + SetTensor(kLstmInputTensor, input_, input_size_); + // Forget Gate Tensors + SetTensor(kLstmInputToForgetWeightsTensor, + forget_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToForgetWeightsTensor, + forget_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmForgetGateBiasTensor, forget_gate_data_.fused_bias, + bias_size_); + // Input Gate Tensors + SetTensor(kLstmInputToInputWeightsTensor, + input_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToInputWeightsTensor, + input_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmInputGateBiasTensor, input_gate_data_.fused_bias, + bias_size_); + // Cell Gate Tensors + SetTensor(kLstmInputToCellWeightsTensor, cell_gate_data_.activation_weight, + activation_weight_size_); + SetTensor(kLstmRecurrentToCellWeightsTensor, + cell_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmCellGateBiasTensor, cell_gate_data_.fused_bias, bias_size_); + // Output Gate Tensors + SetTensor(kLstmInputToOutputWeightsTensor, + output_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToOutputWeightsTensor, + output_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmOutputGateBiasTensor, output_gate_data_.fused_bias, + bias_size_); + // State Tensors + SetTensor(kLstmOutputStateTensor, hidden_state_, state_size_, + /*is_variable=*/true); + SetTensor(kLstmCellStateTensor, cell_state_, state_size_, + /*is_variable=*/true); + // // Output Tensor + SetTensor(24, output_, output_size_, /*is_variable=*/true); + } + + template + void SetTensor(const int index, const T* data, int* dims, + const bool is_variable = false) { + // Lite tensors for kernel level testing + tensors_[index].data.data = const_cast(data); + tensors_[index].dims = IntArrayFromInts(dims); + tensors_[index].type = typeToTfLiteType(); + tensors_[index].is_variable = is_variable; + // Eval tensors for internal computation testing + eval_tensors_[index].data.data = const_cast(data); + eval_tensors_[index].dims = IntArrayFromInts(dims); + eval_tensors_[index].type = typeToTfLiteType(); + // update the index + if (index < 24) { + input_tensor_indices_[index + 1] = index; + } + } + + void SetTensorQuantizationParam( + const int index, const TensorQuantizationParameters& quant_param) { + tensors_[index].params.scale = quant_param.scale; + tensors_[index].params.zero_point = quant_param.zero_point; + } + + const TfLiteUnidirectionalSequenceLSTMParams builtin_data_; + GateData + forget_gate_data_; + GateData + input_gate_data_; + GateData + cell_gate_data_; + GateData + output_gate_data_; + + // Keep to ease the testing process (although all quantization information can + // be obtained from individual tensors, they are well organized here and light + // weighted) + NodeQuantizationParameters quantization_settings_; + + // Not const since IntArrayFromInts takes int *; the first element of the + // array must be the size of the array + int input_size_[4] = {3, batch_size, time_steps, input_dimension}; + int output_size_[4] = {3, batch_size, time_steps, state_dimension}; + // weight tensor has C-style "row-major" memory ordering + int activation_weight_size_[3] = {2, state_dimension, input_dimension}; + int recurrent_weight_size_[3] = {2, state_dimension, state_dimension}; + int bias_size_[2] = {1, state_dimension}; + int state_size_[3] = {2, batch_size, state_dimension}; + + // see lstm_shared.h for tensor names, the last tensor is the output tensor + TfLiteTensor tensors_[24 + 1]; + // Use for internel kernel testing + TfLiteEvalTensor eval_tensors_[24 + 1]; + // indices for the tensors inside the node (required by kernel runner) + int input_tensor_indices_[1 + 24] = {}; + // single output (last in the tensors array) + int output_tensor_indices_[2] = {1, 24}; + + // tennsor data + // states are initialized to zero + ActivationType hidden_state_[batch_size * state_dimension] = {}; + CellType cell_state_[batch_size * state_dimension] = {}; + // input is defined in the ModelContent (const across all derived models) + ActivationType input_[batch_size * input_dimension * time_steps] = {}; + ActivationType output_[batch_size * state_dimension * time_steps] = {}; +}; + +// Converts floating point gate parameters to the corresponding quantized +// version +template +GateData +CreateQuantizedGateData( + const GateData& + gate_parameters, + const TensorQuantizationParameters& input_quantization_params, + const TensorQuantizationParameters& output_quantization_params, + const GateQuantizationParameters& gate_quantization_params, + const bool fold_zero_point) { + GateData + quantized_gate_params; + tflite::SymmetricQuantize(gate_parameters.activation_weight, + quantized_gate_params.activation_weight, + state_dimension * input_dimension, + gate_quantization_params.activation_weight.scale); + tflite::SymmetricQuantize(gate_parameters.recurrent_weight, + quantized_gate_params.recurrent_weight, + state_dimension * state_dimension, + gate_quantization_params.recurrent_weight.scale); + tflite::SymmetricQuantize(gate_parameters.fused_bias, + quantized_gate_params.fused_bias, state_dimension, + gate_quantization_params.bias.scale); + // Note: steps below are not required for the generalized LSTM evaluation + // (e.g., 16bits activation) + if (fold_zero_point) { + // Copy the bias values to prepare zero_point folded + // bias precomputation. bias has same scale as + // input_scale*input_weight_scale) + std::memcpy(quantized_gate_params.activation_zp_folded_bias, + quantized_gate_params.fused_bias, 2 * sizeof(int32_t)); + // Pre-calculate bias - zero_point * weight (a constant). + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_params.activation_weight, + -1 * input_quantization_params.zero_point, 2, 2, + quantized_gate_params.activation_zp_folded_bias); + + // Initialize the folded bias to zeros for accumulation + for (size_t i = 0; i < 2; i++) { + quantized_gate_params.recurrent_zp_folded_bias[i] = 0; + } + // Calculate : -zero_point * weight since it is a constant + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_params.recurrent_weight, + -1 * output_quantization_params.zero_point, 2, 2, + quantized_gate_params.recurrent_zp_folded_bias); + } + return quantized_gate_params; +} + +// Create integer LSTM node content from the float node contents and +// quantization settings +// Note: fold_zero_point folds the zero point into the bias (precomputation), +// which is not required for the generalized integer inference (16 bits act +// LSTM). +template +LstmNodeContent +CreateIntegerNodeContents( + const NodeQuantizationParameters& quantization_settings, + const bool fold_zero_point, + LstmNodeContent& float_node_contents) { + const auto quantized_forget_gate_data = + CreateQuantizedGateData( + float_node_contents.ForgetGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.forget_gate, + fold_zero_point); + const auto quantized_input_gate_data = + CreateQuantizedGateData( + float_node_contents.InputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.input_gate, + fold_zero_point); + const auto quantized_cell_gate_data = + CreateQuantizedGateData( + float_node_contents.CellGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.cell_gate, + fold_zero_point); + const auto quantized_output_gate_params = + CreateQuantizedGateData( + float_node_contents.OutputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.output_gate, + fold_zero_point); + LstmNodeContent + quantized_node_content( + float_node_contents.BuiltinData(), quantized_forget_gate_data, + quantized_input_gate_data, quantized_cell_gate_data, + quantized_output_gate_params); + + // Quantize the floating point input + ActivationType quantized_input[batch_size * input_dimension * time_steps] = + {}; + Quantize(float_node_contents.GetInputData(), quantized_input, + batch_size * input_dimension * time_steps, + quantization_settings.input.scale, + quantization_settings.input.zero_point); + quantized_node_content.SetInputData(quantized_input); + // Quantize the floating point hidden state + ActivationType quantized_hidden_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetHiddenStateData(), quantized_hidden_state, + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + quantized_node_content.SetHiddenStateData(quantized_hidden_state); + // Quantize the floating point cell state + CellType quantized_cell_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetCellStateData(), quantized_cell_state, + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point); + quantized_node_content.SetCellStateData(quantized_cell_state); + + // Add scale and zero point to tensors + quantized_node_content.AddQuantizationParameters(quantization_settings); + return quantized_node_content; +} + +// Get the gate output data (one time step) for a simple 2X2 model +// batch_size = 2; time_steps = 1; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 4 +// gate_output_size = batch_size*state_dimension = 4 +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData(); + +// Get the kernel output data for a simple 2X2 model +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 12 +// gate_output_size = batch_size*state_dimension = 4 +// output_size = time_steps*gate_output_size = 12 +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData(); + +// Create a 2x2 float node content +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +LstmNodeContent +Create2x3x2X2FloatNodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +// Get the quantization settings for the 2X2 model +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings(); + +// Create int8 (activation) x int8 (weight) -> int16 (cell) node +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input is in float format since the source of truth is always the float +// configuration +LstmNodeContent +Create2x3x2X2Int8NodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +// Create int16 (activation) x int8 (weight) -> int16 (cell) node +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input is in float format since the source of truth is always the float +// configuration +LstmNodeContent +Create2x3x2X2Int16NodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py new file mode 100644 index 000000000..97c8798ef --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py @@ -0,0 +1,192 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +""" Generate the LSTM kernel test data settings in lstm_test_data.cc +1. Print the quantization settings for the test model (Get2X2Int8LstmQuantizationSettings in .cc) +2. Print the intermediate step outputs inside the LSTM for a single step LSTM invocation (Get2X2GateOutputCheckData in .cc) +3. Print the outputs for multi-step LSTM invocation (Get2X2LstmEvalCheckData in .cc) + +Every invocation gives three types information: +1. Quantized output: kernel output in integer +2. Dequantized output: Quantized output in floating point representation +3. Float output: output from the floating point computation (i.e., float kernel) + +Note: +1. Change quantization settings in _KERNEL_CONFIG to see the outcomes from various quantization schema (e.g., 8x8 Vs. 16x8) +2. Only single batch inference is supporte here. Change _GATE_TEST_DATA or _MULTISTEP_TEST_DATA to see kernel outputs on different input data +3. The quantization computation here is not the exact as the c++ implementation. The integer calculation is mimiced here using floating point. +No fixed point math is implemented here. The purpose is to illustrate the computation procedure and possible quantization error accumulation, not for bit exactness. +""" +from absl import app +import numpy as np + +from tflite_micro.tensorflow.lite.micro.kernels.testdata import lstm_test_data_utils + +# Basic kernel information (defaul a 2x2 model with int8 quantization) +# change activation_bits to 16 for 16x8 case +_KERNEL_CONFIG = { + 'quantization_settings': { + 'weight_bits': 8, + 'activation_bits': 8, + 'bias_bits': 32, + 'cell_bits': 16, + }, + 'shape_info': { + 'input_dim': 2, + 'state_dim': 2 + } +} + +# Kernel data setting (weight data for every gate). Corresponds to Create2x3x2X2FloatNodeContents in .cc +_KERNEL_PARAMETERS = { + 'forget_gate_data': { + 'activation_weight_data': [-10, -10, -20, -20], + 'recurrent_weight_data': [-10, -10, -20, -20], + 'bias_data': [1, 2], + }, + 'input_gate_data': { + 'activation_weight_data': [10, 10, 20, 20], + 'recurrent_weight_data': [10, 10, 20, 20], + 'bias_data': [-1, -2], + }, + 'cell_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, + 'output_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, +} + +# Input and states setting for gate level testing (Get2X2GateOutputCheckData in .cc) +# Only single batch inference is supported (default as batch1 in .cc) +_GATE_TEST_DATA = { + 'init_hidden_state_vals': [-0.1, 0.2], + 'init_cell_state_vals': [-1.3, 6.2], + 'input_data': [0.2, 0.3], + 'hidden_state_range': (-0.5, 0.7), + 'cell_state_range': [-8, 8], + 'input_data_range': [-1, 1] +} + +# Input and states setting for multi-step kernel testing (Get2X2LstmEvalCheckData in .cc) +# Only single batch inference is supported (default as batch1 in .cc) +_MULTISTEP_TEST_DATA = { + 'init_hidden_state_vals': [0, 0], + 'init_cell_state_vals': [0, 0], + 'input_data': [0.2, 0.3, 0.2, 0.3, 0.2, 0.3], # three time steps + 'hidden_state_range': (-0.5, 0.7), + 'cell_state_range': [-8, 8], + 'input_data_range': [-1, 1] +} + + +def print_tensor_quantization_params(tensor_name, tensor): + """Print the tensor quantization information (scale and zero point)""" + print(f"{tensor_name}, scale: {tensor.scale}, zero_point:" + f" {tensor.zero_point}") + + +def print_gate_tensor_params(gate_name, gate): + """Print the quantization information for a gate (input/forget/cell/output gate)""" + print(f"###### Quantization settings for {gate_name} ######") + print_tensor_quantization_params("activation weight", gate.activation_weight) + print_tensor_quantization_params("recurrent weight", gate.activation_weight) + + +def print_quantization_settings(lstm_debugger): + """Print the quantization information for a LSTM kernel""" + print_gate_tensor_params("forget gate", lstm_debugger.forget_gate_params) + print_gate_tensor_params("input gate", lstm_debugger.input_gate_params) + print_gate_tensor_params("cell gate", lstm_debugger.modulation_gate_params) + print_gate_tensor_params("output gate", lstm_debugger.output_gate_params) + print("###### State Tensors ######") + print_tensor_quantization_params("Hidden State Tensor", + lstm_debugger.hidden_state_tensor) + print_tensor_quantization_params("Cell State Tensor", + lstm_debugger.cell_state_tensor) + + +def print_one_step(lstm_debugger): + """Print the intermediate calculation results for one step LSTM invocation (Get2X2GateOutputCheckData in .cc)""" + test_data = np.array(_GATE_TEST_DATA['input_data']).reshape((-1, 1)) + input_data_range = _GATE_TEST_DATA['input_data_range'] + input_tensor = lstm_test_data_utils.assemble_quantized_tensor( + test_data, + input_data_range[0], + input_data_range[1], + symmetry=False, + num_bits=_KERNEL_CONFIG['quantization_settings']['activation_bits']) + lstm_debugger.invoke(input_tensor, debug=True) + + +def print_multi_step(lstm_debugger, debug=False): + """Print the output of every step for multi step LSTM invocation (Get2X2LstmEvalCheckData in .cc)""" + input_data = _MULTISTEP_TEST_DATA['input_data'] + input_data_range = _MULTISTEP_TEST_DATA['input_data_range'] + input_data_size = _KERNEL_CONFIG['shape_info']['input_dim'] + input_start_pos = 0 + steps = 0 + while input_start_pos < len(input_data): + one_step_data = np.array(input_data[input_start_pos:input_start_pos + + input_data_size]).reshape((-1, 1)) + input_tensor = lstm_test_data_utils.assemble_quantized_tensor( + one_step_data, + input_data_range[0], + input_data_range[1], + symmetry=False, + num_bits=_KERNEL_CONFIG['quantization_settings']['activation_bits']) + output_quant, output_float = lstm_debugger.invoke(input_tensor, + debug=debug) + print(f"##### Step: {steps} #####") + print(f"Quantized Output: {output_quant.flatten()}") + print( + f"Dequantized Output: {lstm_debugger.hidden_state_tensor.dequantized_data.flatten().flatten()}" + ) + print(f"Float Output: {output_float.flatten()}") + input_start_pos += input_data_size + steps += 1 + + +def main(_): + one_step_lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger( + _KERNEL_CONFIG, + _KERNEL_PARAMETERS, + _GATE_TEST_DATA['init_hidden_state_vals'], + _GATE_TEST_DATA['hidden_state_range'], + _GATE_TEST_DATA['init_cell_state_vals'], + _GATE_TEST_DATA['cell_state_range'], + ) + print("========== Quantization Settings for the Test Kernal ========== ") + print_quantization_settings(one_step_lstm_debugger) + print("========== Single Step Invocation Intermediates ========== ") + print_one_step(one_step_lstm_debugger) + + multi_step_lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger( + _KERNEL_CONFIG, + _KERNEL_PARAMETERS, + _MULTISTEP_TEST_DATA['init_hidden_state_vals'], + _MULTISTEP_TEST_DATA['hidden_state_range'], + _MULTISTEP_TEST_DATA['init_cell_state_vals'], + _MULTISTEP_TEST_DATA['cell_state_range'], + ) + print("========== Multi Step Invocation Intermediates ========== ") + print_multi_step(multi_step_lstm_debugger) + + +if __name__ == "__main__": + app.run(main) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py new file mode 100644 index 000000000..cb5c21de4 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py @@ -0,0 +1,108 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import numpy as np +import tensorflow as tf + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tflite_micro.tensorflow.lite.micro.kernels.testdata import lstm_test_data_utils + +_KERNEL_CONFIG = { + 'quantization_settings': { + 'weight_bits': 8, + 'activation_bits': 8, + 'bias_bits': 32, + 'cell_bits': 16, + }, + 'shape_info': { + 'input_dim': 2, + 'state_dim': 2 + } +} + +_KERNEL_PARAMETERS = { + 'forget_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, + 'input_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, + 'cell_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, + 'output_gate_data': { + 'activation_weight_data': [1, 1, 1, 1], + 'recurrent_weight_data': [1, 1, 1, 1], + 'bias_data': [0, 0], + }, +} + +_KERNEL_INITIALIZATION_SETTINGS = { + 'init_hidden_state_vals': [0, 0], + 'init_cell_state_vals': [0, 0], + 'hidden_state_range': (-1, 1), + 'cell_state_range': [-8, 8], +} + + +def create_keras_lstm(stateful=True): + """Create a keras model with LSTM layer only for testing""" + input_layer = tf.keras.layers.Input(shape=(1, 2), batch_size=1, name="input") + lstm_output = tf.keras.layers.LSTM(units=2, + return_sequences=True, + stateful=stateful, + unit_forget_bias=False, + return_state=True, + kernel_initializer="ones", + recurrent_initializer="ones", + bias_initializer="zeros")(input_layer) + return tf.keras.Model(input_layer, lstm_output, name="LSTM") + + +class QuantizedLSTMDebuggerTest(test_util.TensorFlowTestCase): + + # only the float output from the debugger is used to setup the test data in .cc + def testFloatCompareWithKeras(self): + keras_lstm = create_keras_lstm() + lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger( + _KERNEL_CONFIG, + _KERNEL_PARAMETERS, + _KERNEL_INITIALIZATION_SETTINGS['init_hidden_state_vals'], + _KERNEL_INITIALIZATION_SETTINGS['hidden_state_range'], + _KERNEL_INITIALIZATION_SETTINGS['init_cell_state_vals'], + _KERNEL_INITIALIZATION_SETTINGS['cell_state_range'], + ) + + num_steps = 20 + for _ in range(num_steps): + # debugger has input shape (input_dim, 1) + test_data = np.random.rand(2, 1) + input_tensor = lstm_test_data_utils.assemble_quantized_tensor( + test_data, -1, 1, False) + _, output_float = lstm_debugger.invoke(input_tensor) + output_keras, _, _ = keras_lstm.predict(test_data.reshape(1, 1, 2)) + + diff = abs(output_float.flatten() - output_keras.flatten()) + self.assertAllLess(diff, 1e-6) + + +if __name__ == "__main__": + test.main() \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py new file mode 100644 index 000000000..345b143fa --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py @@ -0,0 +1,531 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Utils to lstm_test_data_generator.py that helps to generate the test data for lstm kernel (lstm_test_data.cc)""" + +import numpy as np +from copy import deepcopy + + +def clip_range(vals, bit_width): + """Mimic integer calculation. + Clip the range of vals based on bit width. + e.g., clip_range([300], 8) = [127] since int8 have range [-128, 127] + Args: + vals (np.array): float representation of the integer values + bit_width (int): number of desired bits for vals + Returns: + np.array : clipped vals + """ + # Numpy integer calculation does not do saturation. Implement here + min_val = -2**(bit_width - 1) + max_val = 2**(bit_width - 1) - 1 + if vals.max() > max_val or vals.min() < min_val: + print(f"WARNING: integer overflow!") + return np.clip(vals, min_val, max_val) + + +def quantize_data(data, scale, zero_point=0, bit_width=8): + """Quantize the data to integer type with desired bit width. + The quantized data is represented using float since integer calculation in + numpy may differ from other implementations (e.g., no integer saturation + protection in numpy) + Args: + data (np.array): float data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + bit_width (int): number of representative bits for vals + Returns: + np.array : quantized data in float but clipped range + """ + vals = np.round(data / scale) + zero_point + return clip_range(vals, bit_width) + + +def dequantize_data(quantized_data, scale, zero_point=0): + """Dequantize the data to integer type with desired bit width. + Args: + quantized_data (np.array): quantized data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + Returns: + np.array : dequantized data + """ + return scale * (quantized_data - zero_point) + + +def rescale(data, effective_scale, zero_point, num_bits): + """Rescale the data to the effective scale """ + # q = r/s + z + rescaled = np.round(data * effective_scale) + zero_point + return clip_range(rescaled, num_bits) + + +def calculate_scale(min_val, max_val, num_bits=8, symmetry=False): + """Calculate quantization scale from the range and bit width""" + num_bins = np.power(2, num_bits) - 1 + if symmetry: + return max(abs(min_val), abs(max_val)) / int(num_bins / 2) + return np.array((max_val - min_val) / num_bins, dtype=np.float32) + + +def calculate_zp(min_val, scale, num_bits=8): + """Calculate the zero point from the minimal value""" + quantized_floor = -np.power(2, num_bits) / 2 + return int(quantized_floor - min_val / scale) + + +def sigmoid(x): + """Sigmoid (floating point)""" + return 1 / (1 + np.exp(-x)) + + +def quantized_sigmoid(input, input_scale, output_scale, num_bits=16): + """Sigmoid (interger)""" + float_input = input * input_scale + float_result = sigmoid(float_input) + return quantize_data(float_result, output_scale, bit_width=num_bits) + + +def quantized_tanh(input, input_scale, output_scale, num_bits=16): + """Tanh (interger)""" + float_input = input * input_scale + float_result = np.tanh(float_input) + return quantize_data(float_result, output_scale, bit_width=num_bits) + + +class QuantizedTensor: + """Data structure for a quantized tensor""" + + def __init__(self, float_data, scale, zero_point, symmetry, num_bits=8): + """Tensor is initialized using the floating point data""" + self.float_data = float_data + self.scale = scale + self.zero_point = int(zero_point) + self.symmetry = symmetry + self.num_bits = num_bits + self.quantized_data = quantize_data(float_data, scale, zero_point, + num_bits) + + @property + def dequantized_data(self): + """Dequantize the quantized tensor data back to floating point""" + return dequantize_data(self.quantized_data, self.scale, + self.zero_point).flatten() + + +class QuantizedGateParams: + """Hold the quantization data and corresponding information for a LSTM gate (forget/input/cell/output gate) """ + + def __init__( + self, + quantized_activation_weight, + quantized_recurrent_weight, + bias_data_float, + shape_info, + bias_num_bits=32, + cell_num_bits=16, + modulation=False, + ): + self.shape_info = shape_info + self.activation_weight = quantized_activation_weight + self.recurrent_weight = quantized_recurrent_weight + self.bias_data_float = bias_data_float + self.modulation = modulation + self.bias_num_bits = bias_num_bits + self.cell_num_bits = cell_num_bits + # For INT16 cell state, the input scale is Q3.12 + self.nonlinear_input_scale = np.power(2.0, -(cell_num_bits - 4)) + # For INT16 cell state, the output scale is Q0.15 + self.nonlinear_output_scale = np.power(2.0, -(cell_num_bits - 1)) + + def quantize_bias_data(self, input_scale): + bias_scale = self.activation_weight.scale * input_scale + return quantize_data(self.bias_data_float, bias_scale, 0, + self.bias_num_bits) + + def fold_zeropoint(self, weight, zero_point): + # W*real = W*(quant-zero_pt) = Wquant - Wzero_pt + # Wzero_pt is precomputed here as a constant (implemented in TFLM) + zp_vector = zero_point * np.ones(shape=(self.shape_info['input_dim'], 1)) + zero_folded_vector = np.dot(weight, zp_vector) + return -1 * clip_range(zero_folded_vector, self.bias_num_bits) + + def compute_activation_bias(self, input_scale, input_zp): + # Wz is precomputed here and added it to the original bias (same scale) + zero_folded_vector = self.fold_zeropoint( + self.activation_weight.quantized_data, input_zp) + quantized_bias = self.quantize_bias_data(input_scale) + return zero_folded_vector + quantized_bias + + def compute_recurrent_bias(self, recurrent_zp): + # Wz is precomputed here + return self.fold_zeropoint(self.recurrent_weight.quantized_data, + recurrent_zp) + + def effective_activation_scale(self, input_scale): + # Combine input scale with output scale. Used for fc calculation + return (self.activation_weight.scale * input_scale / + self.nonlinear_input_scale) + + def effective_recurrence_scale(self, recurrent_scale): + # Combine input scale with output scale. Used for fc calculation + return (self.recurrent_weight.scale * recurrent_scale / + self.nonlinear_input_scale) + + +def assemble_quantized_tensor(float_data, + min_val, + max_val, + symmetry, + num_bits=8): + """Create a QuantizedTensor using floating point data, range information, and bit width""" + scale = calculate_scale(min_val, max_val, num_bits, symmetry) + zp = 0 + if not symmetry: + zp = calculate_zp(min_val, scale, num_bits) + return QuantizedTensor(float_data, + scale, + zp, + symmetry=symmetry, + num_bits=num_bits) + + +def create_gate_params(gate_parameters, model_config, modulation=False): + """Create a QuantizedGateParams using the gate paramater information and the model configuration""" + shape_info = model_config['shape_info'] + quantization_settings = model_config['quantization_settings'] + + activation_weight_data = np.array( + gate_parameters['activation_weight_data']).reshape( + (shape_info['input_dim'], shape_info['state_dim'])) + activation_weight = assemble_quantized_tensor( + activation_weight_data, + activation_weight_data.min(), + activation_weight_data.max(), + True, + quantization_settings['weight_bits'], + ) + + recurrent_weight_data = np.array( + gate_parameters['recurrent_weight_data']).reshape( + (shape_info['input_dim'], shape_info['state_dim'])) + + recurrent_weight = assemble_quantized_tensor( + recurrent_weight_data, + recurrent_weight_data.min(), + recurrent_weight_data.max(), + True, + quantization_settings['weight_bits'], + ) + + bias_data_float = np.array(gate_parameters['bias_data']).reshape( + (shape_info['input_dim'], 1)) + gate_params = QuantizedGateParams( + activation_weight, + recurrent_weight, + bias_data_float, + shape_info, + bias_num_bits=quantization_settings['bias_bits'], + cell_num_bits=quantization_settings['cell_bits'], + modulation=modulation, + ) + return gate_params + + +def gate_calculation(input, hidden_state, gate_params, debug=False): + """ + A gate calculation is tanh(FC(activation, activation weight) + FC(recurrent, recurrent weight)). + For modulation gate, sigmoid is used instead of tanh. + + Note: for debugging purpose, floating point calculation is conducted in parallel with the integer calculation + """ + # Quantized Version + input_fc = np.dot(gate_params.activation_weight.quantized_data, + input.quantized_data) + input_fc += gate_params.compute_activation_bias(input.scale, + input.zero_point) + input_fc = rescale(input_fc, + gate_params.effective_activation_scale(input.scale), 0, + gate_params.cell_num_bits) + recurrent_fc = np.dot(gate_params.recurrent_weight.quantized_data, + hidden_state.quantized_data) + recurrent_fc += gate_params.compute_recurrent_bias(hidden_state.zero_point) + recurrent_fc = rescale( + recurrent_fc, gate_params.effective_recurrence_scale(hidden_state.scale), + 0, gate_params.cell_num_bits) + + before_activation = clip_range(input_fc + recurrent_fc, + gate_params.cell_num_bits) + + # Float Version + float_result = np.dot(gate_params.activation_weight.float_data, + input.float_data) + float_result += np.dot(gate_params.recurrent_weight.float_data, + hidden_state.float_data) + float_result += gate_params.bias_data_float + + if debug: + print(f'input fc: {input_fc.flatten()}') + print(f'recurrent fc: {recurrent_fc.flatten()}') + + dequantized_res = dequantize_data(before_activation, + gate_params.nonlinear_input_scale) + print(f'Intermediate before activation: {before_activation.flatten()}') + print(f'dequantized :{dequantized_res.flatten()} ') + print(f'float computation result: {float_result.flatten()} ') + + diff = dequantized_res - float_result + print(f'diff percentage (%): {abs(diff/float_result).flatten()*100}') + + if gate_params.modulation: + activated = quantized_tanh(before_activation, + gate_params.nonlinear_input_scale, + gate_params.nonlinear_output_scale, + gate_params.cell_num_bits) + float_result = np.tanh(float_result) + else: + activated = quantized_sigmoid(before_activation, + gate_params.nonlinear_input_scale, + gate_params.nonlinear_output_scale, + gate_params.cell_num_bits) + float_result = sigmoid(float_result) + + if debug: + dequantized_res = dequantize_data(activated, + gate_params.nonlinear_output_scale) + print(f'Gate result: {activated.flatten()} ') + print(f'Dequantized: {dequantized_res.flatten()} ') + print(f'float computation result: {float_result.flatten()} ') + diff = dequantized_res - float_result + print(f'diff percentage (%): {abs(diff/float_result).flatten()*100}') + + return activated, float_result + + +# The LSTM class +class QuantizedLSTMDebugger(object): + """Help the debugging process of the LSTM kernel implementation by + 1. Exposing the kernel internal computation + 2. Run floating point calculation in parallel with the integer version + """ + + def __init__( + self, + kernel_config, + kernel_params, + init_hidden_state_vals, + hiddens_state_range, + init_cell_state_vals, + cell_state_range, + cell_clip=8, + ): + self.kernel_config = kernel_config + self.forget_gate_params = create_gate_params( + kernel_params['forget_gate_data'], kernel_config) + self.input_gate_params = create_gate_params( + kernel_params['input_gate_data'], kernel_config) + self.modulation_gate_params = create_gate_params( + kernel_params['cell_gate_data'], kernel_config, modulation=True) + self.output_gate_params = create_gate_params( + kernel_params['output_gate_data'], kernel_config) + self.quantization_settings = kernel_config['quantization_settings'] + + self.hidden_state_tensor = assemble_quantized_tensor( + np.array(init_hidden_state_vals).reshape((-1, 1)), + hiddens_state_range[0], + hiddens_state_range[1], + False, + self.quantization_settings['activation_bits'], + ) + self.cell_state_tensor = assemble_quantized_tensor( + np.array(init_cell_state_vals).reshape((-1, 1)), + cell_state_range[0], + cell_state_range[1], + True, + self.quantization_settings['cell_bits'], + ) + + self.quantized_cell_clip = quantize_data( + cell_clip, + self.cell_state_tensor.scale, + self.cell_state_tensor.zero_point, + self.quantization_settings['cell_bits'], + ) + + def invoke(self, input_tensor, debug=False): + assert ( + input_tensor.num_bits == self.quantization_settings['activation_bits']) + + prev_hidden_state_tensor = deepcopy(self.hidden_state_tensor) + prev_cell_state_tensor = deepcopy(self.cell_state_tensor) + + prev_hidden_state_float = prev_hidden_state_tensor.float_data + prev_cell_state_float = prev_cell_state_tensor.float_data + + # forget gate + forget_gate_quant, forget_gate_float = gate_calculation( + input_tensor, prev_hidden_state_tensor, self.forget_gate_params) + + self.cell_state_tensor.quantized_data = rescale( + prev_cell_state_tensor.quantized_data * forget_gate_quant, + self.forget_gate_params.nonlinear_output_scale, + 0, + self.quantization_settings['cell_bits'], + ) + self.cell_state_tensor.float_data = (prev_cell_state_float * + forget_gate_float) + + # input gate + input_gate_quant, input_gate_float = gate_calculation( + input_tensor, prev_hidden_state_tensor, self.input_gate_params) + + modulation_gate_quant, modulation_gate_float = gate_calculation( + input_tensor, prev_hidden_state_tensor, self.modulation_gate_params) + + gated_input_quant = rescale( + input_gate_quant * modulation_gate_quant, + self._calculate_effective_cell_scale(), + 0, + self.quantization_settings['cell_bits'], + ) + gated_input_float = input_gate_float * modulation_gate_float + + if ( + debug + ): # Hidden/cell state will be updated, break up the debug to record the intermediate state + print('======================One Step LSTM======================') + print('###### Forget Gate Output: ######') + print(f'Quantized: {forget_gate_quant.flatten()}') + dequantized_val = dequantize_data( + forget_gate_quant, self.forget_gate_params.nonlinear_output_scale, 0) + print(f'Dequantized : {dequantized_val.flatten()}') + print(f'Float : {forget_gate_float.flatten()}') + + print('###### Cell state after forgetting: ######') + print(f'Quantized: {self.cell_state_tensor.quantized_data.flatten()}') + print( + f'Dequantized: {self.cell_state_tensor.dequantized_data.flatten()}') + print(f'Float : {self.cell_state_tensor.float_data.flatten()}') + + print('###### Input gate output: ######') + print(f'Quantized: {input_gate_quant.flatten()}') + dequantized_val = dequantize_data( + input_gate_quant, self.input_gate_params.nonlinear_output_scale, 0) + print(f'Dequantized: {dequantized_val.flatten()}') + print(f'Float : {input_gate_float.flatten()}') + + print('###### cell gate output: ######') + print(f'Quantized: {modulation_gate_quant.flatten()}') + dequantized_val = dequantize_data( + modulation_gate_quant, + self.modulation_gate_params.nonlinear_output_scale, + 0, + ) + print(f'Dequantized: {dequantized_val.flatten()}') + print(f'Float : {modulation_gate_float.flatten()}') + + print('###### Gated input (input_gate * cell_gate): ######') + print(f'Quantized: {gated_input_quant.flatten()}') + dequantized_val = dequantize_data(gated_input_quant, + self.cell_state_tensor.scale, 0) + print(f'Dequantized: {dequantized_val.flatten()}') + print(f'Float : {gated_input_float.flatten()}') + + # Update the cell state + self.cell_state_tensor.quantized_data += gated_input_quant + self._apply_cell_clip() + self.cell_state_tensor.float_data += gated_input_float + + # output gate + output_gate_quant, output_gate_float = gate_calculation( + input_tensor, prev_hidden_state_tensor, self.output_gate_params) + + # Update the hidden state + transformed_cell_quant = quantized_tanh( + self.cell_state_tensor.quantized_data, + self.output_gate_params.nonlinear_input_scale, + self.output_gate_params.nonlinear_output_scale, + self.cell_state_tensor.num_bits, + ) + + transformed_cell_float = np.tanh(self.cell_state_tensor.float_data) + + gated_output_quant = rescale( + output_gate_quant * transformed_cell_quant, + self._calculate_effective_output_scale(), + self.hidden_state_tensor.zero_point, + self.hidden_state_tensor.num_bits, + ) + gated_output_float = output_gate_float * transformed_cell_float + + self.hidden_state_tensor.quantized_data = gated_output_quant + self.hidden_state_tensor.float_data = gated_output_float + + if debug: + print('###### Updated cell state): ######') + print(f'Quantized: {self.cell_state_tensor.quantized_data.flatten()}') + print( + f'Dequantized: {self.cell_state_tensor.dequantized_data.flatten()}') + print(f'Float : {self.cell_state_tensor.float_data.flatten()}') + + print('###### Output gate: ######') + print(f'Quantized : {output_gate_quant.flatten()}') + dequantized_val = dequantize_data( + output_gate_quant, self.output_gate_params.nonlinear_output_scale, 0) + print(f'Dequantized: {dequantized_val.flatten()}') + print(f'Float : {output_gate_float.flatten()}') + + print('###### Tanh transformed cell: ######') + print(f'Quantized: {transformed_cell_quant.flatten()}') + dequantized_val = dequantize_data( + transformed_cell_quant, + self.output_gate_params.nonlinear_output_scale, + 0, + ) + print(f'Dequantized: {dequantized_val.flatten()}') + print(f'Float : {transformed_cell_float.flatten()}') + + print('###### Updated hidden state: ######') + print(f'Quantized: {gated_output_quant.flatten()}') + print( + f'Dequantized: {self.hidden_state_tensor.dequantized_data.flatten()}' + ) + print(f'Float : {gated_output_float.flatten()}') + + diff = abs(self.hidden_state_tensor.dequantized_data - + gated_output_float.flatten()) + max_diff_perc = diff / gated_output_float.flatten() * 100 + print(f'Max diff perc (%): {max_diff_perc}') + return gated_output_quant, gated_output_float + + def _calculate_effective_output_scale(self): + return (self.output_gate_params.nonlinear_output_scale * + self.modulation_gate_params.nonlinear_output_scale / + self.hidden_state_tensor.scale) + + def _calculate_effective_cell_scale(self): + return (self.input_gate_params.nonlinear_output_scale * + self.modulation_gate_params.nonlinear_output_scale / + self.cell_state_tensor.scale) + + def _apply_cell_clip(self): + cell_vals = self.cell_state_tensor.quantized_data + if (cell_vals.max() > self.quantized_cell_clip + or cell_vals.min() < -self.quantized_cell_clip): + print(f'WARNING: cell values clip to {self.quantized_cell_clip}!') + + self.cell_state_tensor.quantized_data = np.round( + np.clip(cell_vals, -self.quantized_cell_clip, + self.quantized_cell_clip)) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose.cc index daa75f173..00e907e52 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose.cc @@ -116,7 +116,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_TRANSPOSE() { +TfLiteRegistration_V1 Register_TRANSPOSE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose_conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose_conv.cc index 9ea31454c..dc0ee1714 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose_conv.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/transpose_conv.cc @@ -166,6 +166,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { micro_context->AllocateTempInputTensor(node, kFilterTensor); TF_LITE_ENSURE(context, filter != nullptr); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), + "Hybrid models are not supported on TFLite Micro."); + // Get height and width of the output. const int width = SizeOfDimension(output, 2); const int height = SizeOfDimension(output, 1); @@ -253,11 +259,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const OpData& data = *(static_cast(node->user_data)); TF_LITE_ENSURE_EQ(context, input->type, output->type); - TF_LITE_ENSURE_MSG( - context, - input->type == filter->type || - (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), - "Hybrid models are not supported on TFLite Micro."); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: { @@ -344,7 +345,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace -TfLiteRegistration Register_TRANSPOSE_CONV() { +TfLiteRegistration_V1 Register_TRANSPOSE_CONV() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc index f8b231349..ddbdae48c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,1375 +13,156 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include +// Integer version of unidirectional sequence lstm. Only the standard LSTM +// (defined in the keras LSTM layer, e.g., no peephole etc.) is supported here. +// Currently used by the 16 bits activation case only + +#include +#include -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/fully_connected.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/lstm_eval.h" #include "tensorflow/lite/micro/kernels/lstm_shared.h" -#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h" -#include "tensorflow/lite/micro/micro_log.h" namespace tflite { namespace { +/*Helper Functions*/ -constexpr int scratch_index_size = 12; - -struct UnidirectionalSequenceLstmOpData { - // If the lstm is layer norm. - bool use_layer_norm; - // The scratch index. - int scratch_index[scratch_index_size]; - - int32_t row_sums_size; - int32_t* row_sums; - bool compute_row_sums = false; - - int32_t input_zero_point; - int32_t output_state_zero_point; - - IntegerLstmParameter integer_lstm_param; -}; - -TfLiteStatus PopulateQuantizedLstmParams8x8_16( - TfLiteContext* context, TfLiteNode* node, - IntegerLstmParameter* integer_lstm_param) { - MicroContext* micro_context = GetMicroContext(context); - - // Calculate quantized clip for projection and cell. - const auto* params = - static_cast(node->builtin_data); - const float cell_clip = params->cell_clip; - const float proj_clip = params->proj_clip; - - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - TfLiteTensor* output_tensor = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TF_LITE_ENSURE(context, - cell_state->quantization.type != kTfLiteNoQuantization); - auto* cell_state_params = - static_cast(cell_state->quantization.params); - TF_LITE_ENSURE(context, - output_tensor->quantization.type != kTfLiteNoQuantization); - auto* proj_params = static_cast( - output_tensor->quantization.params); - if (cell_clip > 0.0f) { - integer_lstm_param->quantized_cell_clip = static_cast(std::min( - std::max(cell_clip / cell_state_params->scale->data[0], -32768.0f), - 32767.0f)); - } else { - integer_lstm_param->quantized_cell_clip = 0; - } - if (proj_clip > 0.0f) { - integer_lstm_param->quantized_proj_clip = static_cast(std::min( - std::max(proj_clip / proj_params->scale->data[0], -128.0f), 127.0f)); - } else { - integer_lstm_param->quantized_proj_clip = 0; - } - - // Calculate effective scales. - UnidirectionalSequenceLstmOpData* op_data = - static_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool use_peephole = (cell_to_output_weights != nullptr); - const bool use_projection = (projection_weights != nullptr); - - // Get intermediate scales and zero points. - float intermediate_scale[5]; - int32_t intermediate_zp[5]; - for (int i = 0; i < 4; ++i) { - if (use_layer_norm) { - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, i); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - auto* params_intermediate = static_cast( - intermediate->quantization.params); - intermediate_scale[i] = params_intermediate->scale->data[0]; - intermediate_zp[i] = params_intermediate->zero_point->data[0]; - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); - } - } else { - // Q3.12 for activation functions. - intermediate_scale[i] = std::pow(2.0f, -12.0f); - intermediate_zp[i] = 0; - } - } - // In the absence of projection, hidden becomes otuput and this intermediate - // is ignored. - TfLiteTensor* hidden = micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization); - auto* hidden_params = - static_cast(hidden->quantization.params); - intermediate_scale[4] = hidden_params->scale->data[0]; - intermediate_zp[4] = hidden_params->zero_point->data[0]; - if (hidden != nullptr) { - micro_context->DeallocateTempTfLiteTensor(hidden); - } - - // Scales. - const float default_scale = 1.0; - float input_scale = default_scale; - float input_to_input_weight_scale = default_scale; - float recurrent_to_input_weight_scale = default_scale; - float cell_to_input_weight_scale = default_scale; - float input_to_forget_weight_scale = default_scale; - float recurrent_to_forget_weight_scale = default_scale; - float cell_to_forget_weight_scale = default_scale; - float input_to_cell_weight_scale = default_scale; - float recurrent_to_cell_weight_scale = default_scale; - float input_to_output_weight_scale = default_scale; - float recurrent_to_output_weight_scale = default_scale; - float cell_to_output_weight_scale = default_scale; - float projection_weight_scale = default_scale; - float layer_norm_input_scale = default_scale; - float layer_norm_forget_scale = default_scale; - float layer_norm_cell_scale = default_scale; - float layer_norm_output_scale = default_scale; - float output_state_scale = default_scale; - int cell_scale = 1; - - // Effective scales. - float effective_input_to_input_scale = default_scale; - float effective_recurrent_to_input_scale = default_scale; - float effective_cell_to_input_scale = default_scale; - float effective_input_to_forget_scale = default_scale; - float effective_recurrent_to_forget_scale = default_scale; - float effective_cell_to_forget_scale = default_scale; - float effective_input_to_cell_scale = default_scale; - float effective_recurrent_to_cell_scale = default_scale; - float effective_input_to_output_scale = default_scale; - float effective_recurrent_to_output_scale = default_scale; - float effective_cell_to_output_scale = default_scale; - float effective_proj_scale = default_scale; - float effective_hidden_scale = default_scale; - - // Populate scales. - if (!use_cifg) { - input_to_input_weight_scale = input_to_input_weights->params.scale; - recurrent_to_input_weight_scale = recurrent_to_input_weights->params.scale; - } - - if (use_peephole) { - if (!use_cifg) { - cell_to_input_weight_scale = cell_to_input_weights->params.scale; - } - cell_to_forget_weight_scale = cell_to_forget_weights->params.scale; - cell_to_output_weight_scale = cell_to_output_weights->params.scale; - } - - if (use_layer_norm) { - if (!use_cifg) { - layer_norm_input_scale = input_layer_norm_coefficients->params.scale; - } - layer_norm_forget_scale = forget_layer_norm_coefficients->params.scale; - layer_norm_cell_scale = cell_layer_norm_coefficients->params.scale; - layer_norm_output_scale = output_layer_norm_coefficients->params.scale; - } - - if (use_projection) { - projection_weight_scale = projection_weights->params.scale; - } - output_state_scale = output_state->params.scale; - - input_to_forget_weight_scale = input_to_forget_weights->params.scale; - input_to_cell_weight_scale = input_to_cell_weights->params.scale; - input_to_output_weight_scale = input_to_output_weights->params.scale; - recurrent_to_forget_weight_scale = recurrent_to_forget_weights->params.scale; - recurrent_to_cell_weight_scale = recurrent_to_cell_weights->params.scale; - recurrent_to_output_weight_scale = recurrent_to_output_weights->params.scale; - - // Check cell state (already used above) - TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale)); - // TF_LITE_ENSURE(context, cell_scale <= -9); - integer_lstm_param->cell_scale = cell_scale; - input_scale = input->params.scale; - - // Calculate effective scales. - if (!use_cifg) { - effective_input_to_input_scale = - input_to_input_weight_scale * input_scale / intermediate_scale[0]; - effective_recurrent_to_input_scale = recurrent_to_input_weight_scale * - output_state_scale / - intermediate_scale[0]; - } - effective_input_to_forget_scale = - input_to_forget_weight_scale * input_scale / intermediate_scale[1]; - effective_recurrent_to_forget_scale = recurrent_to_forget_weight_scale * - output_state_scale / - intermediate_scale[1]; - - effective_input_to_cell_scale = - input_to_cell_weight_scale * input_scale / intermediate_scale[2]; - effective_recurrent_to_cell_scale = recurrent_to_cell_weight_scale * - output_state_scale / - intermediate_scale[2]; - - effective_input_to_output_scale = - input_to_output_weight_scale * input_scale / intermediate_scale[3]; - effective_recurrent_to_output_scale = recurrent_to_output_weight_scale * - output_state_scale / - intermediate_scale[3]; - - effective_hidden_scale = - std::pow(2.0f, -15.0f) / intermediate_scale[4] * std::pow(2.0f, -15.0f); - - effective_proj_scale = - projection_weight_scale * intermediate_scale[4] / output_state_scale; - - if (use_peephole) { - if (!use_cifg) { - effective_cell_to_input_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_input_weight_scale / intermediate_scale[0]; - } - effective_cell_to_forget_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_forget_weight_scale / intermediate_scale[1]; - effective_cell_to_output_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_output_weight_scale / intermediate_scale[3]; - } - - // Decompose scales. - int shift_output; - QuantizeMultiplier(static_cast(effective_input_to_input_scale), - &integer_lstm_param->effective_input_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_input_scale), - &integer_lstm_param->effective_recurrent_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_input_scale), - &integer_lstm_param->effective_cell_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_forget_scale), - &integer_lstm_param->effective_input_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_forget_scale), - &integer_lstm_param->effective_recurrent_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_forget_scale), - &integer_lstm_param->effective_cell_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_cell_scale), - &integer_lstm_param->effective_input_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_cell_scale), - &integer_lstm_param->effective_recurrent_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_output_scale), - &integer_lstm_param->effective_input_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_output_scale), - &integer_lstm_param->effective_recurrent_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_output_scale), - &integer_lstm_param->effective_cell_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_proj_scale), - &integer_lstm_param->effective_proj_scale_a, - &shift_output); - integer_lstm_param->effective_proj_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_hidden_scale), - &integer_lstm_param->effective_hidden_scale_a, - &shift_output); - integer_lstm_param->effective_hidden_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_input_scale), - &integer_lstm_param->layer_norm_input_scale_a, - &shift_output); - integer_lstm_param->layer_norm_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_forget_scale), - &integer_lstm_param->layer_norm_forget_scale_a, - &shift_output); - integer_lstm_param->layer_norm_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_cell_scale), - &integer_lstm_param->layer_norm_cell_scale_a, - &shift_output); - integer_lstm_param->layer_norm_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_output_scale), - &integer_lstm_param->layer_norm_output_scale_a, - &shift_output); - integer_lstm_param->layer_norm_output_scale_b = - static_cast(shift_output); - - integer_lstm_param->hidden_zp = intermediate_zp[4]; - - // 10000 is used to make sure the kernel logic does not overflow. - if (!use_cifg) { - integer_lstm_param->input_variance_guard = - std::max(1, static_cast(10000 * layer_norm_input_scale)); - } - integer_lstm_param->forget_variance_guard = - std::max(1, static_cast(10000 * layer_norm_forget_scale)); - integer_lstm_param->cell_variance_guard = - std::max(1, static_cast(10000 * layer_norm_cell_scale)); - integer_lstm_param->output_variance_guard = - std::max(1, static_cast(10000 * layer_norm_output_scale)); - - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); - } - if (output_tensor != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_tensor); - } - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - - return kTfLiteOk; -} - -// Temporary buffers used for hybrid mode -enum HybridTempBuffer { - kPrimaryScratchBuffer = 0, - kInputQuantized = 1, - kOutputStateQuantized = 2, - kCellStateQuantized = 3, - kInputScalingFactors = 4, - kOutputStateScalingFactors = 5, - kProductScalingFactors = 6, - kRecoveredCellWeights = 7, - kAccumScratch = 8, - kInputZeroPoints = 9, - kOutputStateZeroPoints = 10, - kScales = 11, - kNumHybridTempBuffers = 12, -}; +/*Kernel functions*/ void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer( - context, sizeof(UnidirectionalSequenceLstmOpData)); -} - -// Check that input tensor dimensions matches with each other. -TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, - TfLiteNode* node, int n_input, - int n_output, int n_cell, - bool use_layer_norm, bool is_integer) { - MicroContext* micro_context = GetMicroContext(context); - - const auto* params = reinterpret_cast(node->builtin_data); - - // Making sure clipping parameters have valid values. - // == 0 means no clipping - // > 0 means clipping - TF_LITE_ENSURE(context, params->cell_clip >= 0); - TF_LITE_ENSURE(context, params->proj_clip >= 0); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - if (input_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); - } - - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); - - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - if (recurrent_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], - n_output); - } - - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], - n_output); - - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], - n_output); - - // We make sure the input-gate's parameters are either both present (regular - // LSTM) or not at all (CIFG-LSTM). - const bool cifg_weights_all_or_none = - ((input_to_input_weights != nullptr) && - (recurrent_to_input_weights != nullptr)) || - ((input_to_input_weights == nullptr) && - (recurrent_to_input_weights == nullptr)); - TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - if (cell_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_input_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - if (cell_to_forget_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_forget_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - if (cell_to_output_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_output_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - // Making sure the peephole weights are there all or none. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool peephole_weights_all_or_none = - ((cell_to_input_weights != nullptr || use_cifg) && - (cell_to_forget_weights != nullptr) && - (cell_to_output_weights != nullptr)) || - ((cell_to_input_weights == nullptr) && - (cell_to_forget_weights == nullptr) && - (cell_to_output_weights == nullptr)); - TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); - - // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmInputGateBiasTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); - } else { - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteFloat32); - } - } - - TfLiteTensor* forget_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* cell_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* output_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmOutputGateBiasTensor); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - if (projection_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); - } - - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); - if (projection_bias != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteFloat32); - } - } - - // Making sure the projection tensors are consistent: - // 1) If projection weight is not present, then projection bias should not be - // present. - // 2) If projection weight is present, then projection bias is optional. - const bool projecton_tensors_consistent = - ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projecton_tensors_consistent == true); - - if (use_layer_norm) { - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients, nullptr); - } else { - TF_LITE_ENSURE(context, input_layer_norm_coefficients != nullptr); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteFloat32); - } - } - - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteFloat32); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } - - return kTfLiteOk; -} - -TfLiteStatus PrecomputeZeroPointTimesWeightWithBias( - TfLiteContext* context, int32_t zero_point, - const TfLiteTensor* weight_tensor, const TfLiteTensor* bias_tensor, - int32_t** output) { - if (weight_tensor == nullptr) { - return kTfLiteOk; - } - - const RuntimeShape& weight_shape = GetTensorShape(weight_tensor); - TF_LITE_ENSURE_EQ(context, weight_shape.DimensionsCount(), 2); - const int row = weight_shape.Dims(0); - const int col = weight_shape.Dims(1); - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - *output = static_cast( - context->AllocatePersistentBuffer(context, row * sizeof(int32_t))); - - if (bias_tensor == nullptr) { - memset(*output, 0, row * sizeof(int32_t)); - } else { - const int32_t* bias = GetTensorData(bias_tensor); - memcpy(*output, bias, row * sizeof(int32_t)); - } - if (zero_point != 0) { - const int8_t* weight = GetTensorData(weight_tensor); - tflite::tensor_utils::MatrixScalarMultiplyAccumulate(weight, zero_point, - row, col, *output); - } - return kTfLiteOk; + return context->AllocatePersistentBuffer(context, sizeof(OpDataLSTM)); } -TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias( - TfLiteContext* context, UnidirectionalSequenceLstmOpData* op_data, - TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - const int32_t input_zero_point = -input->params.zero_point; - const int32_t output_state_zero_point = -output_state->params.zero_point; - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); - - IntegerLstmParameter* integer_lstm_params = &op_data->integer_lstm_param; - - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - const auto* params = - static_cast(intermediate->quantization.params); - const int32_t hidden_zp = params->zero_point->data[0]; - - // Get bias and perform zero point calculation. - // When there is layer normalization, the gate bias does not apply to matmul - // directly: - // y = ln(w * x + w * r + w * c) + b. - const bool is_layer_norm = op_data->use_layer_norm; - - // Forget gate. - TfLiteTensor* forget_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_forget_weights, forget_gate_bias, - &(integer_lstm_params->input_to_forget_effective_bias))); - - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_forget_weights, - nullptr, &(integer_lstm_params->recurrent_to_forget_effective_bias))); - - // Modulation gate. - TfLiteTensor* cell_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_cell_weights, cell_gate_bias, - &(integer_lstm_params->input_to_cell_effective_bias))); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_cell_weights, nullptr, - &(integer_lstm_params->recurrent_to_cell_effective_bias))); - - // Output gate. - TfLiteTensor* output_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmOutputGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_output_weights, output_gate_bias, - &(integer_lstm_params->input_to_output_effective_bias))); - - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_output_weights, - nullptr, &(integer_lstm_params->recurrent_to_output_effective_bias))); - - // Input gate. The calculation is only meaningful for non-cifg case. - TfLiteTensor* input_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmInputGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_input_weights, input_gate_bias, - &(integer_lstm_params->input_to_input_effective_bias))); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_input_weights, nullptr, - &(integer_lstm_params->recurrent_to_input_effective_bias))); - - // Projection bias. The calculation is only meaningful for with projection. - TF_LITE_ENSURE_OK(context, - PrecomputeZeroPointTimesWeightWithBias( - context, hidden_zp, projection_weights, projection_bias, - &(integer_lstm_params->projection_effective_bias))); - - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } - - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); - } - - return kTfLiteOk; -} - -// Resize the output and state tensors based on the sizes of the input tensors. -// Allocate a temporary scratch tensor. Also check that the sizes of the input -// tensors match each other. TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context, TfLiteNode* node) { - UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - // Check we have all the inputs and outputs we need. - bool use_layer_norm = false; - if (node->inputs->size == 24) { - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - if (forget_layer_norm_coefficients == nullptr) { - use_layer_norm = false; - } else { - use_layer_norm = true; - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - } else if (node->inputs->size == 20) { - // This is deprecated and is only kept here for backward compatibility. - use_layer_norm = false; - } else { - MicroPrintf("The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs", - node->inputs->size); - return kTfLiteError; - } TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - op_data->use_layer_norm = use_layer_norm; - - // Inferring batch size, number of outputs and sequence length and - // number of cells from the input tensors. - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - op_data->input_zero_point = input->params.zero_point; - const bool is_integer = input->type == kTfLiteInt8; - TF_LITE_ENSURE(context, input->dims->size > 1); - const auto* params = - reinterpret_cast( - node->builtin_data); - const bool time_major = params->time_major; - const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0]; - const int n_input = input->dims->data[2]; + TF_LITE_ENSURE_EQ(context, node->inputs->size, 24); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - const int n_cell = input_to_output_weights->dims->data[0]; - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); + TFLITE_DCHECK(node->builtin_data != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->data[0], - n_cell); - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Check that input tensor dimensions matches with each other. + OpDataLSTM* op_data = reinterpret_cast(node->user_data); + const auto* builtin_data = + static_cast(node->builtin_data); + // All TempTfLiteTensors will be deallocated through the destructor. + LstmTensors lstm_tensors(context, node); + TF_LITE_ENSURE_OK(context, lstm_tensors.ValidateTensorStatus(context)); + + op_data->cell_gate_nonlinear_type = builtin_data->activation; + op_data->size_info = + CreateLstmSizeInfo(builtin_data->time_major, + lstm_tensors.GetInternalTensor(kLstmInputTensor)->dims, + lstm_tensors.HiddenStateTensor()->dims); TF_LITE_ENSURE_OK( - context, CheckInputTensorDimensions(context, node, n_input, n_output, - n_cell, use_layer_norm, is_integer)); - - // Get the pointer to output, output_state and cell_state buffer tensors. - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - op_data->output_state_zero_point = output_state->params.zero_point; - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - - // Check the shape of input state tensors. - // These tensor may be 1D or 2D. It's fine as long as the total size is - // correct. - TF_LITE_ENSURE_EQ(context, NumElements(output_state), n_batch * n_output); - TF_LITE_ENSURE_EQ(context, NumElements(cell_state), n_batch * n_cell); - - // Check the shape of output tensor against that of input tensor - TF_LITE_ENSURE_EQ(context, output->dims->size, 3); - TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]); - TF_LITE_ENSURE_EQ(context, input->dims->data[1], output->dims->data[1]); - TF_LITE_ENSURE_EQ(context, output->dims->data[2], n_output); - - if (is_integer) { - const int num_intermediate_tensors = node->intermediates->size; - TF_LITE_ENSURE(context, num_intermediate_tensors == 5); - } - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - - const bool use_cifg = (input_to_input_weights == nullptr); - - // Create a primary scratch buffer for hybrid and float - // If is_integer, primary scratch buffer has a different size - if (!is_integer) { - int scratch_buffer_size[2]; - scratch_buffer_size[0] = n_batch; - - if (use_cifg) { - // Reserving space for Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 3; - } else { - // Reserving space for Input, Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 4; - } - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - scratch_buffer_size[0] * scratch_buffer_size[1] * - TfLiteTypeGetSize(input->type), - &(op_data->scratch_index[kPrimaryScratchBuffer]))); - } - - if (is_integer) { - // Integer UnidirectionalSequenceLSTM prepare function for 8x8->16. - // This code path needs 5 intermediate tensors per Op. - // Populate quantization parameters. - PopulateQuantizedLstmParams8x8_16(context, node, - &op_data->integer_lstm_param); - // Allocate scratch buffer. Need 4 16-bit buffer with size n_batch * n_cell - // and 1 8-bit buffer with size n_batch * n_cell. For integer - // UnidirectionalSequenceLSTM, we do not need the extra 32-bit buffer. - for (int i = 0; i < 5; ++i) { - TfLiteType buffer_type = kTfLiteInt16; - - if (i == 4) { - buffer_type = kTfLiteInt8; - } - - TF_LITE_ENSURE_OK( - context, - context->RequestScratchBufferInArena( - context, n_batch * n_cell * TfLiteTypeGetSize(buffer_type), - &(op_data->scratch_index[i]))); - } - - // Populate precomputed zp * weight. - TF_LITE_ENSURE_OK(context, PopulatePrecomputedZPTimesWeightsWithBias( - context, op_data, node)); - } - - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (output != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); + context, ValidateTensorSize(context, lstm_tensors, op_data->size_info)); + + // Create cell state information and gate parameters (Fully Connected and Mul) + auto cell_state_type = + lstm_tensors.GetInternalTensor(kLstmCellStateTensor)->type; + if (cell_state_type == kTfLiteFloat32) { + op_data->cell_state_info = + CreateLstmCellStateInfoFloat(builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersFloat(context, lstm_tensors, op_data)); + } else if (cell_state_type == kTfLiteInt16) { + op_data->cell_state_info = CreateLstmCellStateInfo( + lstm_tensors.CellStateTensor()->params.scale, builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersInteger(context, lstm_tensors, op_data)); + } else { + MicroPrintf( + "Cell state type %s (%d) not supported. The quantized Unidirectional " + "Sequence LSTM Op only support int16 cell state", + TfLiteTypeGetName(cell_state_type), cell_state_type); + return kTfLiteError; } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); + // request buffers (four buffers) + for (size_t i = 0; i < 4; i++) { + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, + op_data->size_info.batch_size * + op_data->size_info.state_dimension * + TfLiteTypeGetSize(cell_state_type), + &(op_data->buffer_indices[i]))); } return kTfLiteOk; } TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(context->GetScratchBuffer != nullptr); - - const auto* params = - reinterpret_cast( - node->builtin_data); - const UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - const bool time_major = params->time_major; - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kLstmInputTensor); - - const TfLiteEvalTensor* input_to_input_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToInputWeightsTensor); - - const TfLiteEvalTensor* input_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToForgetWeightsTensor); - - const TfLiteEvalTensor* input_to_cell_weights = - tflite::micro::GetEvalInput(context, node, kLstmInputToCellWeightsTensor); - - const TfLiteEvalTensor* input_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToOutputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_input_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToInputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_forget_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToForgetWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_cell_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToCellWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_output_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToOutputWeightsTensor); - - const TfLiteEvalTensor* cell_to_input_weights = - tflite::micro::GetEvalInput(context, node, kLstmCellToInputWeightsTensor); - - const TfLiteEvalTensor* cell_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToForgetWeightsTensor); - - const TfLiteEvalTensor* cell_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToOutputWeightsTensor); - - const TfLiteEvalTensor* input_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmInputGateBiasTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataLSTM& op_data = *reinterpret_cast(node->user_data); + auto kernel_content = CreateLSTMKernelContent(context, node); - const TfLiteEvalTensor* forget_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmForgetGateBiasTensor); + const auto activation_type = + kernel_content.internal_tensors[kLstmInputTensor]->type; + const auto weight_type = + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor]->type; - const TfLiteEvalTensor* cell_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmCellGateBiasTensor); - - const TfLiteEvalTensor* output_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmOutputGateBiasTensor); - - const TfLiteEvalTensor* projection_weights = - tflite::micro::GetEvalInput(context, node, kLstmProjectionWeightsTensor); - - const TfLiteEvalTensor* projection_bias = - tflite::micro::GetEvalInput(context, node, kLstmProjectionBiasTensor); - - TfLiteEvalTensor* output_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmOutputStateTensor); - - TfLiteEvalTensor* cell_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmCellStateTensor); - - TFLITE_DCHECK(cell_state != nullptr); - - const TfLiteEvalTensor* input_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmInputLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* forget_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmForgetLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* cell_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmCellLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* output_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmOutputLayerNormCoefficientsTensor) - : nullptr; - - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kLstmOutputTensor); - - // Copy out the LSTM specific params so they can be passed in the function. - TfLiteLSTMParams lstm_params; - lstm_params.activation = params->activation; - lstm_params.cell_clip = params->cell_clip; - lstm_params.proj_clip = params->proj_clip; - lstm_params.asymmetric_quantize_inputs = params->asymmetric_quantize_inputs; - - switch (input_to_output_weights->type) { + switch (activation_type) { case kTfLiteFloat32: { - // Index the scratch buffers pointers to the global scratch buffer. - return EvalFloatLstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, - input_layer_norm_coefficients, forget_layer_norm_coefficients, - cell_layer_norm_coefficients, output_layer_norm_coefficients, - /*aux_input=*/nullptr, - /*aux_input_to_input_weights=*/nullptr, - /*aux_input_to_forget_weights=*/nullptr, - /*aux_input_to_cell_weights=*/nullptr, - /*aux_input_to_output_weights=*/nullptr, input_gate_bias, - forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, - /*output_offset=*/0, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kPrimaryScratchBuffer])), - output_state, cell_state, output); - } break; - case kTfLiteUInt8: + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, buffers); + break; + } case kTfLiteInt8: { - return EvalInteger8x8_16Lstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, - input_layer_norm_coefficients, forget_layer_norm_coefficients, - cell_layer_norm_coefficients, output_layer_norm_coefficients, - input_gate_bias, forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, &op_data->integer_lstm_param, - op_data->output_state_zero_point, output_state, cell_state, output, - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[0])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[1])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[2])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[3])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[4])), - nullptr); - } break; - default: - MicroPrintf("Type %s is not currently supported.", - TfLiteTypeGetName(input_to_output_weights->type)); + switch (weight_type) { + case kTfLiteInt8: { + // 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), activation_type); + return kTfLiteError; + } + } + break; + } + case kTfLiteInt16: { + switch (weight_type) { + case kTfLiteInt8: { + // 16(activation)x8(weight)->16(cell) LSTM with 64 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), weight_type); + return kTfLiteError; + } + } + break; + } + default: { + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(activation_type), activation_type); return kTfLiteError; + } } + return kTfLiteOk; } } // namespace -TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() { +TfLiteRegistration_V1 Register_UNIDIRECTIONAL_SEQUENCE_LSTM() { return tflite::micro::RegisterOp(UnidirectionalSequenceLstmInit, UnidirectionalSequenceLstmPrepare, UnidirectionalSequenceLstmEval); } - } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h new file mode 100644 index 000000000..9b334594d --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h @@ -0,0 +1,47 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_ + +#include + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +// This is the most generic TfLiteRegistration_V1. The actual supported types +// may still be target dependent. The only requirement is that every +// implementation (reference or optimized) must define this function. +// TODO(b/230666079): resolve conflict with xtensa implementation +TfLiteRegistration_V1 Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); + +#if defined(CMSIS_NN) +// Returns a TfLiteRegistration_V1 struct for kernel variant that only supports +// int8 activations and int8 weights and uses the latency optimized +// implementations. +TfLiteRegistration_V1 Register_UNIDIRECTIONAL_SEQUENCE_LSTM_INT8(); + +#else +inline TfLiteRegistration_V1 Register_UNIDIRECTIONAL_SEQUENCE_LSTM_INT8() { + return Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc index b58df2e73..d6fcf62c8 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,9 +21,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace unpack { + namespace { constexpr int kInputTensor = 0; @@ -100,13 +98,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } + } // namespace -} // namespace unpack -TfLiteRegistration Register_UNPACK() { - return tflite::micro::RegisterOp(nullptr, nullptr, unpack::Eval); +TfLiteRegistration_V1 Register_UNPACK() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/var_handle.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/var_handle.cc index cbd2485ca..5ddf90f22 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/var_handle.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/var_handle.cc @@ -86,7 +86,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_VAR_HANDLE() { +TfLiteRegistration_V1 Register_VAR_HANDLE() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/while.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/while.cc index 811c9eaec..65c5ac8ac 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/while.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/while.cc @@ -126,7 +126,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace. -TfLiteRegistration Register_WHILE() { +TfLiteRegistration_V1 Register_WHILE() { return tflite::micro::RegisterOp(Init, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/zeros_like.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/zeros_like.cc index bb0c3147c..5c702abdd 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/zeros_like.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/zeros_like.cc @@ -81,7 +81,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -TfLiteRegistration Register_ZEROS_LIKE() { +TfLiteRegistration_V1 Register_ZEROS_LIKE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc index 96484330e..a89a5e6c2 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -179,6 +179,7 @@ TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo( const int32_t* offline_offsets, SubgraphAllocations* allocations) { AllocationInfo* allocation_info = info_.allocation_info; // Initialize allocation info for every tensor in every subgraph. + int offline_index = 0; for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size(); subgraph_idx++) { const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx); @@ -203,7 +204,14 @@ TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo( (!subgraph->tensors()->Get(i)->is_variable()) && (current->bytes != 0); if (offline_offsets) { - current->offline_offset = offline_offsets[i]; + current->offline_offset = offline_offsets[offline_index++]; + + // Mark offline planned variable tensors so they can get an offline + // offset and be handled offline. + if (subgraph->tensors()->Get(i)->is_variable() && + current->offline_offset != kOnlinePlannedBuffer) { + current->needs_allocating = true; + } } else { current->offline_offset = kOnlinePlannedBuffer; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc index 3853df307..5fa1c0cc6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -490,15 +490,6 @@ TfLiteStatus MicroAllocator::FinishModelAllocation( TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles( scratch_buffer_handles, scratch_buffer_request_count_)); - // Allocate buffers for variable tensors. - for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); - subgraph_idx++) { - const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); - TFLITE_DCHECK(subgraph != nullptr); - TF_LITE_ENSURE_STATUS(AllocateVariables( - subgraph, subgraph_allocations[subgraph_idx].tensors)); - } - // Plan all subgraphs and scratch buffers together. TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations, *scratch_buffer_handles)); @@ -712,6 +703,14 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor( return tensor; } +uint8_t* MicroAllocator::AllocateTempBuffer(size_t size, size_t alignment) { + return non_persistent_buffer_allocator_->AllocateTemp(size, alignment); +} + +void MicroAllocator::DeallocateTempBuffer(uint8_t* buffer) { + non_persistent_buffer_allocator_->DeallocateTemp(buffer); +} + TfLiteStatus MicroAllocator::ResetTempAllocations() { return non_persistent_buffer_allocator_->ResetTempAllocations(); } @@ -754,23 +753,27 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors( return kTfLiteOk; } -TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors) { +TfLiteStatus MicroAllocator::AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { auto* tensor = subgraph->tensors()->Get(i); if (tensor->is_variable()) { - size_t buffer_size; - TF_LITE_ENSURE_STATUS( - TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); + if (offline_planner_offsets == nullptr || + offline_planner_offsets[i] == kOnlinePlannedBuffer) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); - eval_tensors[i].data.data = - persistent_buffer_allocator_->AllocatePersistentBuffer( - buffer_size, MicroArenaBufferAlignment()); + eval_tensors[i].data.data = + persistent_buffer_allocator_->AllocatePersistentBuffer( + buffer_size, MicroArenaBufferAlignment()); - if (eval_tensors[i].data.data == nullptr) { - MicroPrintf("Failed to allocate variable tensor of size %d", - buffer_size); - return kTfLiteError; + if (eval_tensors[i].data.data == nullptr) { + MicroPrintf("Failed to allocate variable tensor of size %d", + buffer_size); + return kTfLiteError; + } } } } @@ -819,6 +822,17 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( const int32_t* offline_planner_offsets = nullptr; TF_LITE_ENSURE_STATUS( builder.GetOfflinePlannedOffsets(&offline_planner_offsets)); + + // We allocate buffers for variable tensors here since the offline planner + // offsets are conviently available here. + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + TF_LITE_ENSURE_STATUS(AllocateVariables( + subgraph, allocations[subgraph_idx].tensors, offline_planner_offsets)); + } + TF_LITE_ENSURE_STATUS( builder.InitializeAllocationInfo(offline_planner_offsets, allocations)); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h index 5cd0e1893..05dbf892d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -68,7 +68,7 @@ struct ScratchBufferRequest { struct NodeAndRegistration { TfLiteNode node; - const TfLiteRegistration* registration; + const TfLiteRegistration_V1* registration; }; // Holds a pointer to a buffer for a scratch buffer requested by a kernel during @@ -195,6 +195,13 @@ class MicroAllocator { virtual void DeallocateTempTfLiteTensor(TfLiteTensor*); + // Returns a pointer to a buffer from the temporary arena memory and is only + // guaranteed until a call is made to ResetTempAllocations(). + virtual uint8_t* AllocateTempBuffer(size_t size, size_t alignment); + + // Signals that the temporary buffer no longer needed. + virtual void DeallocateTempBuffer(uint8_t* buffer); + // Resets all temporary allocations. This method should be called after a // chain of temp allocations (e.g. chain of TfLiteTensor objects via // AllocateTfLiteTensor()). @@ -247,9 +254,13 @@ class MicroAllocator { // for all tensor buffers. virtual TfLiteStatus AllocateTfLiteEvalTensors( const Model* model, SubgraphAllocations* subgraph_allocations); + // Allocates persistent tensor buffers for variable tensors in the subgraph. - virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors); + // Online and offline variable tensors are handled differently hence the + // offline_planner_offsets parameter is needed. + virtual TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets); // Allocate and return a persistent TfLiteTensor. // TODO(b/162311891): Drop this method when the interpreter has an API for diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_context.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_context.cc index bb78fe706..b06252acb 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_context.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_context.cc @@ -19,26 +19,34 @@ limitations under the License. #include #include +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/micro_log.h" namespace tflite { MicroContext::MicroContext(MicroAllocator* allocator, const Model* model, MicroGraph* graph) - : allocator_(*allocator), graph_(*graph), model_(model) {} + : allocator_(*allocator), + graph_(*graph), + model_(model), + state_(InterpreterState::kInit) {} MicroContext::~MicroContext() {} void* MicroContext::AllocatePersistentBuffer(size_t bytes) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare || + state_ == InterpreterState::kInit); return allocator_.AllocatePersistentBuffer(bytes); } TfLiteStatus MicroContext::RequestScratchBufferInArena(size_t bytes, int* buffer_idx) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare); return allocator_.RequestScratchBufferInArena( bytes, graph_.GetCurrentSubgraphIndex(), buffer_idx); } void* MicroContext::GetScratchBuffer(int buffer_idx) { + TFLITE_DCHECK(state_ == InterpreterState::kInvoke); ScratchBufferHandle* handle = scratch_buffer_handles_ + buffer_idx; return handle->data; } @@ -94,6 +102,16 @@ void MicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { return allocator_.DeallocateTempTfLiteTensor(tensor); } +uint8_t* MicroContext::AllocateTempBuffer(size_t size, size_t alignment) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare); + return allocator_.AllocateTempBuffer(size, alignment); +} + +void MicroContext::DeallocateTempBuffer(uint8_t* buffer) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare); + allocator_.DeallocateTempBuffer(buffer); +} + TfLiteEvalTensor* MicroContext::GetEvalTensor(int tensor_idx) { return &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()] .tensors[tensor_idx]; @@ -106,6 +124,8 @@ void MicroContext::SetScratchBufferHandles( TfLiteStatus MicroContext::set_external_context( void* external_context_payload) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare || + state_ == InterpreterState::kInvoke); if (external_context_payload == nullptr || external_context_payload_ != nullptr) { MicroPrintf( @@ -126,4 +146,12 @@ void MicroContextReportOpError(struct TfLiteContext* context, va_end(args); } +void MicroContext::SetInterpreterState(MicroContext::InterpreterState state) { + state_ = state; +} + +MicroContext::InterpreterState MicroContext::GetInterpreterState() const { + return state_; +} + } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_context.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_context.h index e7be65444..63b4b7d59 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_context.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_context.h @@ -29,6 +29,15 @@ namespace tflite { // micro_context-> class MicroContext { public: + // Enum that allows MicroContext to keep track of the stages different memory + // planning APIs are available to kernels. + enum class InterpreterState { + kInit, + kPrepare, + kMemoryPlanning, + kInvoke, + }; + // Does not take any ownership, and all pointers must refer to valid objects // that outlive the one constructed. explicit MicroContext(MicroAllocator* allocator, const Model* model, @@ -84,10 +93,26 @@ class MicroContext { // Virtual so that it can be faked for kernel tests. virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor); + // Returns a pointer to a temporary buffer (from the arena). + // This API is only valid from the kernel's Prepare function and + // the buffer's lifetime is also that of the Prepare function. + // Virtual so that it can be faked for kernel tests. + virtual uint8_t* AllocateTempBuffer(size_t size, size_t alignment); + + // Signals that the temporary buffer is no longer needed. + // Virtual so that it can be faked for kernel tests. + virtual void DeallocateTempBuffer(uint8_t* buffer); + // Returns a TfLiteEvalTensor struct for a given index. // Virtual so that it can be faked for kernel tests. virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx); + // Sets the State of MemoryPlanning MicroContext + void SetInterpreterState(MicroContext::InterpreterState state); + + // Sets the State of MemoryPlanning MicroContext + MicroContext::InterpreterState GetInterpreterState() const; + // Does not take ownership of the pointer and the pointer must refer to valid // an object that outlive this class instance. // This can only be called once to set one external context. @@ -110,6 +135,7 @@ class MicroContext { MicroAllocator& allocator_; MicroGraph& graph_; const Model* model_; + InterpreterState state_; ScratchBufferHandle* scratch_buffer_handles_ = nullptr; void* external_context_payload_ = nullptr; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.cc index 98a9ff3c4..577796190 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.cc @@ -27,7 +27,7 @@ limitations under the License. namespace tflite { namespace { -const char* OpNameFromRegistration(const TfLiteRegistration* registration) { +const char* OpNameFromRegistration(const TfLiteRegistration_V1* registration) { if (registration->builtin_code == BuiltinOperator_CUSTOM) { return registration->custom_name; } else { @@ -62,7 +62,7 @@ TfLiteStatus MicroGraph::InitSubgraphs() { for (size_t i = 0; i < operators_size; ++i) { TfLiteNode* node = &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = + const TfLiteRegistration_V1* registration = subgraph_allocations_[subgraph_idx] .node_and_registrations[i] .registration; @@ -96,7 +96,7 @@ TfLiteStatus MicroGraph::PrepareSubgraphs() { for (size_t i = 0; i < operators_size; ++i) { TfLiteNode* node = &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = + const TfLiteRegistration_V1* registration = subgraph_allocations_[subgraph_idx] .node_and_registrations[i] .registration; @@ -126,7 +126,7 @@ TfLiteStatus MicroGraph::FreeSubgraphs() { for (size_t i = 0; i < operators_size; ++i) { TfLiteNode* node = &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = + const TfLiteRegistration_V1* registration = subgraph_allocations_[subgraph_idx] .node_and_registrations[i] .registration; @@ -155,9 +155,10 @@ TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) { for (size_t i = 0; i < operators_size; ++i) { TfLiteNode* node = &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx] - .node_and_registrations[i] - .registration; + const TfLiteRegistration_V1* registration = + subgraph_allocations_[subgraph_idx] + .node_and_registrations[i] + .registration; // This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with // -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.h index 942082aca..ce93d3396 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_graph.h @@ -38,20 +38,20 @@ class MicroGraph { MicroResourceVariables* resource_variables); virtual ~MicroGraph(); - // Sets up builtin data and calls TfLiteRegistration->Init for every operator - // in every subgraph in the model. + // Sets up builtin data and calls TfLiteRegistration_V1->Init for every + // operator in every subgraph in the model. virtual TfLiteStatus InitSubgraphs(); - // Calls TfLiteRegistration->Prepare for every operator in every subgraph in - // the model. + // Calls TfLiteRegistration_V1->Prepare for every operator in every subgraph + // in the model. virtual TfLiteStatus PrepareSubgraphs(); - // Calls TfLiteRegistration->Free for every operator in every subgraph in the - // model. + // Calls TfLiteRegistration_V1->Free for every operator in every subgraph in + // the model. virtual TfLiteStatus FreeSubgraphs(); - // Calls TfLiteRegistration->Invoke for every operator in a single subgraph in - // the model. + // Calls TfLiteRegistration_V1->Invoke for every operator in a single subgraph + // in the model. virtual TfLiteStatus InvokeSubgraph(int subgraph_idx); // Zeros out all variable tensors in all subgraphs in the model. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_interpreter.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_interpreter.cc index 8bbfbb453..515cb4e0f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_interpreter.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_interpreter.cc @@ -24,11 +24,11 @@ limitations under the License. #include "tensorflow/lite/micro/flatbuffer_utils.h" #include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/micro_allocator.h" +#include "tensorflow/lite/micro/micro_context.h" #include "tensorflow/lite/micro/micro_log.h" #include "tensorflow/lite/micro/micro_op_resolver.h" #include "tensorflow/lite/micro/micro_profiler_interface.h" #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h" -#include "tensorflow/lite/micro/tflite_bridge/op_resolver_bridge.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/schema/schema_utils.h" @@ -77,11 +77,17 @@ MicroInterpreter::~MicroInterpreter() { } void MicroInterpreter::Init(MicroProfilerInterface* profiler) { + micro_context_.SetInterpreterState(MicroContext::InterpreterState::kInit); context_.impl_ = static_cast(µ_context_); context_.ReportError = MicroContextReportOpError; context_.GetTensor = MicroContextGetTensor; context_.GetEvalTensor = MicroContextGetEvalTensor; context_.profiler = profiler; + context_.RequestScratchBufferInArena = + MicroContextRequestScratchBufferInArena; + context_.GetExternalContext = MicroContextGetExternalContext; + context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; + context_.GetScratchBuffer = MicroContextGetScratchBuffer; initialization_status_ = kTfLiteOk; } @@ -192,27 +198,15 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer()); - // Only allow AllocatePersistentBuffer in Init stage. - context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; - context_.RequestScratchBufferInArena = nullptr; - context_.GetScratchBuffer = nullptr; - context_.GetExternalContext = nullptr; + micro_context_.SetInterpreterState(MicroContext::InterpreterState::kInit); TF_LITE_ENSURE_STATUS(graph_.InitSubgraphs()); - // Both AllocatePersistentBuffer and RequestScratchBufferInArena is - // available in Prepare stage. - context_.RequestScratchBufferInArena = - MicroContextRequestScratchBufferInArena; - // external_context become available in Prepare stage. - context_.GetExternalContext = MicroContextGetExternalContext; + micro_context_.SetInterpreterState(MicroContext::InterpreterState::kPrepare); TF_LITE_ENSURE_STATUS(graph_.PrepareSubgraphs()); - // Prepare is done, we're ready for Invoke. Memory allocation is no longer - // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer. - context_.AllocatePersistentBuffer = nullptr; - context_.RequestScratchBufferInArena = nullptr; - context_.GetScratchBuffer = MicroContextGetScratchBuffer; + micro_context_.SetInterpreterState( + MicroContext::InterpreterState::kMemoryPlanning); TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation( model_, graph_.GetAllocations(), @@ -267,6 +261,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { TF_LITE_ENSURE_STATUS(Reset()); tensors_allocated_ = true; + micro_context_.SetInterpreterState(MicroContext::InterpreterState::kInvoke); return kTfLiteOk; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h index a4d50c83a..c9a2c8fdb 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ limitations under the License. #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { -TfLiteRegistration* Register_DETECTION_POSTPROCESS(); +TfLiteRegistration_V1* Register_DETECTION_POSTPROCESS(); template class MicroMutableOpResolver : public MicroOpResolver { @@ -46,11 +46,12 @@ class MicroMutableOpResolver : public MicroOpResolver { explicit MicroMutableOpResolver() {} - const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override { + const TfLiteRegistration_V1* FindOp( + tflite::BuiltinOperator op) const override { if (op == BuiltinOperator_CUSTOM) return nullptr; for (unsigned int i = 0; i < registrations_len_; ++i) { - const TfLiteRegistration& registration = registrations_[i]; + const TfLiteRegistration_V1& registration = registrations_[i]; if (registration.builtin_code == op) { return ®istration; } @@ -58,9 +59,9 @@ class MicroMutableOpResolver : public MicroOpResolver { return nullptr; } - const TfLiteRegistration* FindOp(const char* op) const override { + const TfLiteRegistration_V1* FindOp(const char* op) const override { for (unsigned int i = 0; i < registrations_len_; ++i) { - const TfLiteRegistration& registration = registrations_[i]; + const TfLiteRegistration_V1& registration = registrations_[i]; if ((registration.builtin_code == BuiltinOperator_CUSTOM) && (strcmp(registration.custom_name, op) == 0)) { return ®istration; @@ -84,7 +85,8 @@ class MicroMutableOpResolver : public MicroOpResolver { // function is called again for a previously added Custom Operator, the // MicroOpResolver will be unchanged and this function will return // kTfLiteError. - TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) { + TfLiteStatus AddCustom(const char* name, + TfLiteRegistration_V1* registration) { if (registrations_len_ >= tOpCount) { MicroPrintf( "Couldn't register custom op '%s', resolver size is too" @@ -99,7 +101,8 @@ class MicroMutableOpResolver : public MicroOpResolver { return kTfLiteError; } - TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; + TfLiteRegistration_V1* new_registration = + ®istrations_[registrations_len_]; registrations_len_ += 1; *new_registration = *registration; @@ -112,11 +115,11 @@ class MicroMutableOpResolver : public MicroOpResolver { // MicroMutableOpResolver object. TfLiteStatus AddAbs() { - return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(), - ParseAbs); + return AddBuiltin(BuiltinOperator_ABS, Register_ABS(), ParseAbs); } - TfLiteStatus AddAdd(const TfLiteRegistration& registration = Register_ADD()) { + TfLiteStatus AddAdd( + const TfLiteRegistration_V1& registration = Register_ADD()) { return AddBuiltin(BuiltinOperator_ADD, registration, ParseAdd); } @@ -139,7 +142,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddAveragePool2D( - const TfLiteRegistration& registration = Register_AVERAGE_POOL_2D()) { + const TfLiteRegistration_V1& registration = Register_AVERAGE_POOL_2D()) { return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, registration, ParsePool); } @@ -168,8 +171,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddCeil() { - return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(), - ParseCeil); + return AddBuiltin(BuiltinOperator_CEIL, Register_CEIL(), ParseCeil); } TfLiteStatus AddCircularBuffer() { @@ -177,19 +179,17 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddConcatenation() { - return AddBuiltin(BuiltinOperator_CONCATENATION, - tflite::ops::micro::Register_CONCATENATION(), + return AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), ParseConcatenation); } TfLiteStatus AddConv2D( - const TfLiteRegistration& registration = Register_CONV_2D()) { + const TfLiteRegistration_V1& registration = Register_CONV_2D()) { return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D); } TfLiteStatus AddCos() { - return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(), - ParseCos); + return AddBuiltin(BuiltinOperator_COS, tflite::Register_COS(), ParseCos); } TfLiteStatus AddCumSum() { @@ -202,8 +202,8 @@ class MicroMutableOpResolver : public MicroOpResolver { tflite::Register_DEPTH_TO_SPACE(), ParseDepthToSpace); } - TfLiteStatus AddDepthwiseConv2D( - const TfLiteRegistration& registration = Register_DEPTHWISE_CONV_2D()) { + TfLiteStatus AddDepthwiseConv2D(const TfLiteRegistration_V1& registration = + Register_DEPTHWISE_CONV_2D()) { return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, registration, ParseDepthwiseConv2D); } @@ -227,12 +227,11 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddEqual() { - return AddBuiltin(BuiltinOperator_EQUAL, - tflite::ops::micro::Register_EQUAL(), ParseEqual); + return AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), ParseEqual); } TfLiteStatus AddEthosU() { - TfLiteRegistration* registration = tflite::Register_ETHOSU(); + TfLiteRegistration_V1* registration = tflite::Register_ETHOSU(); if (registration) { return AddCustom(tflite::GetString_ETHOSU(), registration); } @@ -253,8 +252,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddFloor() { - return AddBuiltin(BuiltinOperator_FLOOR, - tflite::ops::micro::Register_FLOOR(), ParseFloor); + return AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR(), ParseFloor); } TfLiteStatus AddFloorDiv() { @@ -268,7 +266,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddFullyConnected( - const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) { + const TfLiteRegistration_V1& registration = Register_FULLY_CONNECTED()) { return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration, ParseFullyConnected); } @@ -284,13 +282,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddGreater() { - return AddBuiltin(BuiltinOperator_GREATER, - tflite::ops::micro::Register_GREATER(), ParseGreater); + return AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), + ParseGreater); } TfLiteStatus AddGreaterEqual() { - return AddBuiltin(BuiltinOperator_GREATER_EQUAL, - tflite::ops::micro::Register_GREATER_EQUAL(), + return AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), ParseGreaterEqual); } @@ -305,8 +302,7 @@ class MicroMutableOpResolver : public MicroOpResolver { TfLiteStatus AddL2Normalization() { return AddBuiltin(BuiltinOperator_L2_NORMALIZATION, - tflite::ops::micro::Register_L2_NORMALIZATION(), - ParseL2Normalization); + Register_L2_NORMALIZATION(), ParseL2Normalization); } TfLiteStatus AddL2Pool2D() { @@ -320,19 +316,16 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddLess() { - return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(), - ParseLess); + return AddBuiltin(BuiltinOperator_LESS, Register_LESS(), ParseLess); } TfLiteStatus AddLessEqual() { - return AddBuiltin(BuiltinOperator_LESS_EQUAL, - tflite::ops::micro::Register_LESS_EQUAL(), + return AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), ParseLessEqual); } TfLiteStatus AddLog() { - return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(), - ParseLog); + return AddBuiltin(BuiltinOperator_LOG, Register_LOG(), ParseLog); } TfLiteStatus AddLogicalAnd() { @@ -341,8 +334,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddLogicalNot() { - return AddBuiltin(BuiltinOperator_LOGICAL_NOT, - tflite::ops::micro::Register_LOGICAL_NOT(), + return AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT(), ParseLogicalNot); } @@ -362,12 +354,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddMaximum() { - return AddBuiltin(BuiltinOperator_MAXIMUM, - tflite::ops::micro::Register_MAXIMUM(), ParseMaximum); + return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), + ParseMaximum); } TfLiteStatus AddMaxPool2D( - const TfLiteRegistration& registration = Register_MAX_POOL_2D()) { + const TfLiteRegistration_V1& registration = Register_MAX_POOL_2D()) { return AddBuiltin(BuiltinOperator_MAX_POOL_2D, registration, ParsePool); } @@ -381,11 +373,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddMinimum() { - return AddBuiltin(BuiltinOperator_MINIMUM, - tflite::ops::micro::Register_MINIMUM(), ParseMinimum); + return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), + ParseMinimum); } - TfLiteStatus AddMul(const TfLiteRegistration& registration = Register_MUL()) { + TfLiteStatus AddMul( + const TfLiteRegistration_V1& registration = Register_MUL()) { return AddBuiltin(BuiltinOperator_MUL, registration, ParseMul); } @@ -394,16 +387,16 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddNotEqual() { - return AddBuiltin(BuiltinOperator_NOT_EQUAL, - tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual); + return AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), + ParseNotEqual); } TfLiteStatus AddPack() { - return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(), - ParsePack); + return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack); } - TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) { + TfLiteStatus AddPad( + const TfLiteRegistration_V1& registration = Register_PAD()) { return AddBuiltin(BuiltinOperator_PAD, registration, ParsePad); } @@ -452,7 +445,7 @@ class MicroMutableOpResolver : public MicroOpResolver { TfLiteStatus AddResizeNearestNeighbor() { return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, - tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(), + Register_RESIZE_NEAREST_NEIGHBOR(), ParseResizeNearestNeighbor); } @@ -462,8 +455,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddRsqrt() { - return AddBuiltin(BuiltinOperator_RSQRT, - tflite::ops::micro::Register_RSQRT(), ParseRsqrt); + return AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT(), ParseRsqrt); } TfLiteStatus AddSelectV2() { @@ -476,8 +468,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSin() { - return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(), - ParseSin); + return AddBuiltin(BuiltinOperator_SIN, Register_SIN(), ParseSin); } TfLiteStatus AddSlice() { @@ -485,7 +476,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSoftmax( - const TfLiteRegistration& registration = Register_SOFTMAX()) { + const TfLiteRegistration_V1& registration = Register_SOFTMAX()) { return AddBuiltin(BuiltinOperator_SOFTMAX, registration, ParseSoftmax); } @@ -500,13 +491,11 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSplit() { - return AddBuiltin(BuiltinOperator_SPLIT, - tflite::ops::micro::Register_SPLIT(), ParseSplit); + return AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), ParseSplit); } TfLiteStatus AddSplitV() { - return AddBuiltin(BuiltinOperator_SPLIT_V, - tflite::ops::micro::Register_SPLIT_V(), ParseSplitV); + return AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(), ParseSplitV); } TfLiteStatus AddSqueeze() { @@ -515,13 +504,11 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSqrt() { - return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(), - ParseSqrt); + return AddBuiltin(BuiltinOperator_SQRT, Register_SQRT(), ParseSqrt); } TfLiteStatus AddSquare() { - return AddBuiltin(BuiltinOperator_SQUARE, - tflite::ops::micro::Register_SQUARE(), ParseSquare); + return AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE(), ParseSquare); } TfLiteStatus AddSquaredDifference() { @@ -531,8 +518,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddStridedSlice() { - return AddBuiltin(BuiltinOperator_STRIDED_SLICE, - tflite::ops::micro::Register_STRIDED_SLICE(), + return AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(), ParseStridedSlice); } @@ -545,13 +531,12 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSvdf( - const TfLiteRegistration& registration = Register_SVDF()) { + const TfLiteRegistration_V1& registration = Register_SVDF()) { return AddBuiltin(BuiltinOperator_SVDF, registration, ParseSvdf); } TfLiteStatus AddTanh() { - return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(), - ParseTanh); + return AddBuiltin(BuiltinOperator_TANH, Register_TANH(), ParseTanh); } TfLiteStatus AddTransposeConv() { @@ -565,14 +550,14 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddUnpack() { - return AddBuiltin(BuiltinOperator_UNPACK, - tflite::ops::micro::Register_UNPACK(), ParseUnpack); + return AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), ParseUnpack); } - TfLiteStatus AddUnidirectionalSequenceLSTM() { + TfLiteStatus AddUnidirectionalSequenceLSTM( + const TfLiteRegistration_V1& registration = + Register_UNIDIRECTIONAL_SEQUENCE_LSTM()) { return AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, - Register_UNIDIRECTIONAL_SEQUENCE_LSTM(), - ParseUnidirectionalSequenceLSTM); + registration, ParseUnidirectionalSequenceLSTM); } TfLiteStatus AddVarHandle() { @@ -593,7 +578,7 @@ class MicroMutableOpResolver : public MicroOpResolver { private: TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, - const TfLiteRegistration& registration, + const TfLiteRegistration_V1& registration, TfLiteBridgeBuiltinParseFunction parser) { if (op == BuiltinOperator_CUSTOM) { MicroPrintf("Invalid parameter BuiltinOperator_CUSTOM to the "); @@ -626,7 +611,7 @@ class MicroMutableOpResolver : public MicroOpResolver { return kTfLiteOk; } - TfLiteRegistration registrations_[tOpCount]; + TfLiteRegistration_V1 registrations_[tOpCount]; unsigned int registrations_len_ = 0; // Arrays (and counter) to store the builtin codes and their corresponding diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.cc new file mode 100644 index 000000000..7463e5af4 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.cc @@ -0,0 +1,55 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/micro_op_resolver.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/schema/schema_utils.h" + +namespace tflite { + +TfLiteStatus GetRegistrationFromOpCode( + const OperatorCode* opcode, const MicroOpResolver& op_resolver, + const TfLiteRegistration_V1** registration) { + TfLiteStatus status = kTfLiteOk; + *registration = nullptr; + auto builtin_code = GetBuiltinCode(opcode); + + if (builtin_code > BuiltinOperator_MAX) { + MicroPrintf("Op builtin_code out of range: %d.", builtin_code); + status = kTfLiteError; + } else if (builtin_code != BuiltinOperator_CUSTOM) { + *registration = op_resolver.FindOp(builtin_code); + if (*registration == nullptr) { + MicroPrintf("Didn't find op for builtin opcode '%s'", + EnumNameBuiltinOperator(builtin_code)); + status = kTfLiteError; + } + } else if (!opcode->custom_code()) { + MicroPrintf("Operator with CUSTOM builtin_code has no custom_code.\n"); + status = kTfLiteError; + } else { + const char* name = opcode->custom_code()->c_str(); + *registration = op_resolver.FindOp(name); + if (*registration == nullptr) { + // Do not report error for unresolved custom op, we do the final check + // while preparing ops. + status = kTfLiteError; + } + } + return status; +} +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.h index 02b073139..ed8b10e12 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_op_resolver.h @@ -17,7 +17,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h" -#include "tensorflow/lite/micro/tflite_bridge/op_resolver_bridge.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { @@ -31,38 +30,32 @@ namespace tflite { // We need an interface class instead of directly using MicroMutableOpResolver // because MicroMutableOpResolver is a class template with the number of // registered Ops as the template parameter. -class MicroOpResolver : public TfLiteBridgeOpResolver { +class MicroOpResolver { public: // Returns the Op registration struct corresponding to the enum code from the // flatbuffer schema. Returns nullptr if the op is not found or if op == // BuiltinOperator_CUSTOM. - virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0; + virtual const TfLiteRegistration_V1* FindOp(BuiltinOperator op) const = 0; // Returns the Op registration struct corresponding to the custom operator by // name. - virtual const TfLiteRegistration* FindOp(const char* op) const = 0; - - // This implementation exists for compatibility with the OpResolver base class - // and disregards the version parameter. - const TfLiteRegistration* FindOp(BuiltinOperator op, - int version) const final { - return FindOp(op); - } - - // This implementation exists for compatibility with the OpResolver base class - // and disregards the version parameter. - const TfLiteRegistration* FindOp(const char* op, int version) const final { - return FindOp(op); - } + virtual const TfLiteRegistration_V1* FindOp(const char* op) const = 0; // Returns the operator specific parsing function for the OpData for a // BuiltinOperator (if registered), else nullptr. virtual TfLiteBridgeBuiltinParseFunction GetOpDataParser( BuiltinOperator op) const = 0; - ~MicroOpResolver() override {} + virtual ~MicroOpResolver() {} }; +// Handles the logic for converting between an OperatorCode structure extracted +// from a flatbuffer and information about a registered operator +// implementation. +TfLiteStatus GetRegistrationFromOpCode( + const OperatorCode* opcode, const MicroOpResolver& op_resolver, + const TfLiteRegistration_V1** registration); + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc index 9f6fc74c9..c3f0f4f1f 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc @@ -26,7 +26,11 @@ namespace tflite { uint32_t MicroProfiler::BeginEvent(const char* tag) { if (num_events_ == kMaxEvents) { - num_events_ = 0; + MicroPrintf( + "MicroProfiler errored out because total number of events exceeded the " + "maximum of %d.", + kMaxEvents); + TFLITE_ASSERT_FALSE; } tags_[num_events_] = tag; @@ -52,8 +56,7 @@ void MicroProfiler::Log() const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, - TicksToMs(ticks)); + MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); } #endif } @@ -63,7 +66,7 @@ void MicroProfiler::LogCsv() const { MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%u", i, tags_[i], ticks); + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); } #endif } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.h index d11364745..1c39ea1cb 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.h @@ -69,7 +69,7 @@ class MicroProfiler : public MicroProfilerInterface { // Maximum number of events that this class can keep track of. If we call // AddEvent more than kMaxEvents number of times, then the oldest event's // profiling information will be overwritten. - static constexpr int kMaxEvents = 1024; + static constexpr int kMaxEvents = 4096; const char* tags_[kMaxEvents]; uint32_t start_ticks_[kMaxEvents]; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc index e6cea845b..767e7d17d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc @@ -63,6 +63,7 @@ int MicroResourceVariables::CreateIdIfNoneFound(const char* container, resource_variables_[resource_id].shared_name = shared_name; resource_variables_[resource_id].resource_buffer = nullptr; resource_variables_[resource_id].bytes = 0; + resource_variables_[resource_id].default_value = 0; return resource_id; } @@ -96,9 +97,17 @@ TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context, MicroPrintf("Failed to allocate resource buffer."); return kTfLiteError; } - // Zero out resource buffers by deafult. Buffers can be initialized to - // nonzero values using ASSIGN_VARIABLE. - memset(variable.resource_buffer, 0, variable.bytes); + // Set resource buffers to the zero_point by default. Buffers can be + // initialized to nonzero values using ASSIGN_VARIABLE. + // See comment#2 in b/269648474 for more details why we use zero_point. + if (tensor->quantization.params != nullptr) { + auto* quantization_data = reinterpret_cast( + tensor->quantization.params); + int8_t zero_point = quantization_data->zero_point[0].data[0]; + variable.default_value = zero_point; + } + // TODO(b/269669735): Explains why casting zero_point to int8 and memset. + memset(variable.resource_buffer, variable.default_value, variable.bytes); } return kTfLiteOk; @@ -127,7 +136,8 @@ TfLiteStatus MicroResourceVariables::Assign(int id, TfLiteStatus MicroResourceVariables::ResetAll() { for (int i = 0; i < num_resource_variables_; i++) { MicroResourceVariable variable = resource_variables_[i]; - memset(variable.resource_buffer, 0, variable.bytes); + // TODO(b/269669735): Explains why casting zero_point to int8 and memset. + memset(variable.resource_buffer, variable.default_value, variable.bytes); } return kTfLiteOk; } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h index e8df991c3..fb9917d47 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h @@ -69,6 +69,8 @@ class MicroResourceVariables { // This is only for verifying read size. size_t bytes; + // Initialization default value + int8_t default_value; }; MicroResourceVariables(MicroResourceVariable* variables, diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc index f646d61a2..f41dba61d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -192,11 +192,12 @@ TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors( } TfLiteStatus RecordingMicroAllocator::AllocateVariables( - const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) { + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { RecordedAllocation allocations = SnapshotAllocationUsage(); - TfLiteStatus status = - MicroAllocator::AllocateVariables(subgraph, eval_tensors); + TfLiteStatus status = MicroAllocator::AllocateVariables( + subgraph, eval_tensors, offline_planner_offsets); RecordAllocationUsage(allocations, recorded_tflite_tensor_variable_buffer_data_); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h index 3136fadea..b6f69264d 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -77,8 +77,9 @@ class RecordingMicroAllocator : public MicroAllocator { const Model* model, SubgraphAllocations* subgraph_allocations) override; TfLiteStatus AllocateTfLiteEvalTensors( const Model* model, SubgraphAllocations* subgraph_allocations) override; - TfLiteStatus AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors) override; + TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) override; // TODO(b/162311891): Once all kernels have been updated to the new API drop // this method. It is only used to record TfLiteTensor persistent allocations. TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.cc b/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.cc index 9423242ae..f9bd21681 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.cc @@ -36,12 +36,12 @@ limitations under the License. namespace tflite { namespace testing { -const TfLiteRegistration* PackerOp::getRegistration() { +const TfLiteRegistration_V1* PackerOp::getRegistration() { return GetMutableRegistration(); } -TfLiteRegistration* PackerOp::GetMutableRegistration() { - static TfLiteRegistration r; +TfLiteRegistration_V1* PackerOp::GetMutableRegistration() { + static TfLiteRegistration_V1 r; r.init = Init; r.prepare = Prepare; r.invoke = Invoke; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.h index b8c025a71..7db75d8d2 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/test_helper_custom_ops.h @@ -33,8 +33,8 @@ namespace testing { class PackerOp { public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); + static const TfLiteRegistration_V1* getRegistration(); + static TfLiteRegistration_V1* GetMutableRegistration(); static void* Init(TfLiteContext* context, const char* buffer, size_t length); static void Free(TfLiteContext* context, void* buffer); static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.cc b/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.cc index 594b6129f..8a7e1cb42 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.cc @@ -1431,12 +1431,12 @@ const Model* BuildSimpleMockModelWithNullInputsOutputs() { } // namespace -const TfLiteRegistration* SimpleStatefulOp::getRegistration() { +const TfLiteRegistration_V1* SimpleStatefulOp::getRegistration() { return GetMutableRegistration(); } -TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() { - static TfLiteRegistration r; +TfLiteRegistration_V1* SimpleStatefulOp::GetMutableRegistration() { + static TfLiteRegistration_V1 r; r.init = Init; r.prepare = Prepare; r.invoke = Invoke; @@ -1445,10 +1445,6 @@ TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() { void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocateBufferForEval == nullptr); - TFLITE_DCHECK(context->GetScratchBuffer == nullptr); - TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr); - void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData)); OpData* data = reinterpret_cast(raw); *data = {}; @@ -1521,12 +1517,12 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, return kTfLiteOk; } -const TfLiteRegistration* MockCustom::getRegistration() { +const TfLiteRegistration_V1* MockCustom::getRegistration() { return GetMutableRegistration(); } -TfLiteRegistration* MockCustom::GetMutableRegistration() { - static TfLiteRegistration r; +TfLiteRegistration_V1* MockCustom::GetMutableRegistration() { + static TfLiteRegistration_V1 r; r.init = Init; r.prepare = Prepare; r.invoke = Invoke; @@ -1569,12 +1565,12 @@ TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { bool MockCustom::freed_ = false; -const TfLiteRegistration* MultipleInputs::getRegistration() { +const TfLiteRegistration_V1* MultipleInputs::getRegistration() { return GetMutableRegistration(); } -TfLiteRegistration* MultipleInputs::GetMutableRegistration() { - static TfLiteRegistration r; +TfLiteRegistration_V1* MultipleInputs::GetMutableRegistration() { + static TfLiteRegistration_V1 r; r.init = Init; r.prepare = Prepare; r.invoke = Invoke; @@ -1624,12 +1620,12 @@ TfLiteStatus MultipleInputs::Invoke(TfLiteContext* context, TfLiteNode* node) { bool MultipleInputs::freed_ = false; -const TfLiteRegistration* NoOp::getRegistration() { +const TfLiteRegistration_V1* NoOp::getRegistration() { return GetMutableRegistration(); } -TfLiteRegistration* NoOp::GetMutableRegistration() { - static TfLiteRegistration r; +TfLiteRegistration_V1* NoOp::GetMutableRegistration() { + static TfLiteRegistration_V1 r; r.init = Init; r.prepare = Prepare; r.invoke = Invoke; diff --git a/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.h b/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.h index 2c1c6b481..52f40a613 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/test_helpers.h @@ -55,8 +55,8 @@ class SimpleStatefulOp { }; public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); + static const TfLiteRegistration_V1* getRegistration(); + static TfLiteRegistration_V1* GetMutableRegistration(); static void* Init(TfLiteContext* context, const char* buffer, size_t length); static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); @@ -64,8 +64,8 @@ class SimpleStatefulOp { class MockCustom { public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); + static const TfLiteRegistration_V1* getRegistration(); + static TfLiteRegistration_V1* GetMutableRegistration(); static void* Init(TfLiteContext* context, const char* buffer, size_t length); static void Free(TfLiteContext* context, void* buffer); static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); @@ -78,8 +78,8 @@ class MockCustom { // the sum of the inputs. class MultipleInputs { public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); + static const TfLiteRegistration_V1* getRegistration(); + static TfLiteRegistration_V1* GetMutableRegistration(); static void* Init(TfLiteContext* context, const char* buffer, size_t length); static void Free(TfLiteContext* context, void* buffer); static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); @@ -91,8 +91,8 @@ class MultipleInputs { // A simple no-op operator. class NoOp { public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); + static const TfLiteRegistration_V1* getRegistration(); + static TfLiteRegistration_V1* GetMutableRegistration(); static void* Init(TfLiteContext* context, const char* buffer, size_t length); static void Free(TfLiteContext* context, void* buffer); static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); @@ -216,7 +216,6 @@ TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims, result.is_variable = is_variable; result.allocation_type = kTfLiteMemNone; result.data.data = const_cast(data); - result.quantization = {kTfLiteAffineQuantization, nullptr}; result.bytes = ElementCount(*dims) * sizeof(T); result.data.data = const_cast(data); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD index c0046847f..10ea6f06a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD @@ -1,3 +1,5 @@ +load("@tflm_pip_deps//:requirements.bzl", "requirement") + package( default_visibility = ["//:__subpackages__"], licenses = ["notice"], @@ -22,6 +24,38 @@ py_binary( srcs = ["generate_cc_arrays.py"], ) +py_binary( + name = "requantize_flatbuffer", + srcs = [ + "requantize_flatbuffer.py", + "requantize_flatbuffer_utils.py", + ], + srcs_version = "PY3", + deps = [ + "//tensorflow/lite/python:schema_py", + "//tensorflow/lite/tools:flatbuffer_utils", + "@absl_py//absl:app", + ], +) + +py_test( + name = "requantize_flatbuffer_test", + srcs = ["requantize_flatbuffer_test.py"], + main = "requantize_flatbuffer_test.py", + python_version = "PY3", + tags = [ + "noasan", + "nomsan", # Python doesn't like these symbols from interpreter_wrapper_pybind.so + "noubsan", + ], + deps = [ + ":requantize_flatbuffer", + "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime", + requirement("numpy"), + requirement("tensorflow-cpu"), + ], +) + cc_binary( name = "tflite_flatbuffer_align", srcs = [ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh index 89d2bdece..b76ba6e26 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh @@ -26,28 +26,12 @@ source tensorflow/lite/micro/tools/ci_build/helper_functions.sh # covers non-test binary targets as well. These were previousbly covered by # having build_test but that was removed with #194. -CC=clang readable_run bazel build tensorflow/lite/micro/... \ +CC=clang readable_run bazel build ... \ --build_tag_filters=-no_oss -CC=clang readable_run bazel test tensorflow/lite/micro/... \ +CC=clang readable_run bazel test ... \ --test_tag_filters=-no_oss --build_tag_filters=-no_oss \ --test_output=errors -CC=clang readable_run bazel build tensorflow/lite/micro/... \ - --config=msan --build_tag_filters=-no_oss,-nomsan -CC=clang readable_run bazel test tensorflow/lite/micro/... \ - --config=msan \ - --test_tag_filters=-no_oss,-nomsan --build_tag_filters=-no_oss,-nomsan \ - --test_output=errors - -CC=clang readable_run bazel build tensorflow/lite/micro/... \ - --config=asan --build_tag_filters=-no_oss,-noasan -CC=clang readable_run bazel test tensorflow/lite/micro/... \ - --config=asan \ - --test_tag_filters=-no_oss,-noasan --build_tag_filters=-no_oss,-noasan \ - --test_output=errors - # TODO(b/178621680): enable ubsan once bazel + clang + ubsan errors are fixed. #CC=clang readable_run bazel test tensorflow/lite/micro/... --config=ubsan --test_tag_filters=-no_oss,-noubsan --build_tag_filters=-no_oss,-noubsan -readable_run bazel test tensorflow/lite/tools/... \ - --test_output=errors diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh new file mode 100755 index 000000000..9e025f523 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR=${SCRIPT_DIR}/../../../../.. +cd "${ROOT_DIR}" + +source tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +# We are using a bazel build followed by bazel test to make sure that the CI +# covers non-test binary targets as well. These were previousbly covered by +# having build_test but that was removed with #194. + +CC=clang readable_run bazel build tensorflow/lite/micro/... \ + --config=asan --build_tag_filters=-no_oss,-noasan +CC=clang readable_run bazel test tensorflow/lite/micro/... \ + --config=asan \ + --test_tag_filters=-no_oss,-noasan --build_tag_filters=-no_oss,-noasan \ + --test_output=errors diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh new file mode 100755 index 000000000..a0b355a08 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR=${SCRIPT_DIR}/../../../../.. +cd "${ROOT_DIR}" + +source tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +# We are using a bazel build followed by bazel test to make sure that the CI +# covers non-test binary targets as well. These were previousbly covered by +# having build_test but that was removed with #194. + +CC=clang readable_run bazel build tensorflow/lite/micro/... \ + --config=msan --build_tag_filters=-no_oss,-nomsan +CC=clang readable_run bazel test tensorflow/lite/micro/... \ + --config=msan \ + --test_tag_filters=-no_oss,-nomsan --build_tag_filters=-no_oss,-nomsan \ + --test_output=errors diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh new file mode 100755 index 000000000..9556cffd8 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR=${SCRIPT_DIR}/../../../../.. +cd "${ROOT_DIR}" + +source tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run bazel test tensorflow/lite/tools/... \ + --test_output=errors diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh new file mode 100755 index 000000000..e4922f073 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code for bluepill + +set -e + +TARGET=bluepill +OPTIMIZED_KERNEL_DIR=cmsis_nn +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Build w/o release so that we can run the tests and get additional +# debugging info on failures. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh new file mode 100755 index 000000000..9ec781c14 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code for bluepill + +set -e + +TARGET=bluepill +OPTIMIZED_KERNEL_DIR=cmsis_nn +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Make sure that the release build succeeds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh new file mode 100755 index 000000000..ec7a68f4e --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code for bluepill platform + +set -e +pwd + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +TARGET=bluepill + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# We use Renode differently when running the full test suite (make test) vs an +# individual test. So, we test only of the kernels individually as well to have +# both of the Renode variations be part of the CI. +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} test_kernel_add_test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh index 4c1ab4efa..81ca2c65e 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh @@ -45,6 +45,7 @@ tensorflow/lite/micro/tools/make/downloads/pigweed/pw_presubmit/py/pw_presubmit/ -e kernels/internal/reference/integer_ops/ \ -e kernels/internal/reference/reference_ops.h \ -e python/schema_py_generated.py \ + -e python_requirements.in \ -e tools/make/downloads \ -e tools/make/targets/ecm3531 \ -e BUILD\ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_project_generation.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_project_generation.sh index 5fdd87bae..8814a60e9 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_project_generation.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_project_generation.sh @@ -52,7 +52,7 @@ source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.s # Next, we test that create_tflm_tree can be used to build example binaries. We # perform this test with a Makefile (instead of bazel) because make is more # commonly understood and because we use make for cross-compilation. -EXAMPLES="-e hello_world -e magic_wand -e micro_speech -e person_detection" +EXAMPLES="-e hello_world -e micro_speech -e person_detection" TEST_OUTPUT_DIR="$(mktemp -d)" diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh new file mode 100755 index 000000000..623238ed0 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code using native x86 execution. +# +# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test +# suite on github actions. + +set -e + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Build w/o release so that we can run the tests and get additional +# debugging info on failures. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh new file mode 100755 index 000000000..9d63a2608 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code using native x86 execution. +# +# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test +# suite on github actions. + +set -e + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Build w/o TF_LITE_STATIC_MEMORY to catch additional errors. +# TODO(b/160955687): We run the tests w/o TF_LITE_STATIC_MEMORY to make the +# internal and open source CI consistent. See b/160955687#comment7 for more +# details. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=no_tf_lite_static_memory test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh new file mode 100755 index 000000000..6a0213610 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code using native x86 execution. +# +# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test +# suite on github actions. + +set -e + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Test the hello_world as an example outside of the github repo. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +cp -r ${TENSORFLOW_ROOT}tensorflow/lite/micro/examples/hello_world ./ +sed -i 's/tensorflow\/lite\/micro\/examples\///g' hello_world/Makefile.inc +sed -i 's/$(TENSORFLOW_ROOT)//g' hello_world/Makefile.inc +mv hello_world/Makefile.inc hello_world/Makefile_internal.inc +sed -i 's/tensorflow\/lite\/micro\/examples\///g' hello_world/evaluate_test.cc +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test_evaluate_cc_test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=hello_world/ +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=hello_world/ +rm -rf hello_world \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh new file mode 100755 index 000000000..ec96f99c9 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Called with following arguments: +# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code +# Tests the microcontroller code using native x86 execution. +# +# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test +# suite on github actions. + +set -e + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143715361): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Build with release and logs so that we can run the tests and get +# additional debugging info on failures. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# Next, make sure that the release build succeeds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh new file mode 100755 index 000000000..abfe651c6 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - EXTERNAL or INTERNAL to signal how to run the script +# 2 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 3 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code + +set -e +pwd + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143904317): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + build -j$(nproc) + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + test -j$(nproc) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/README.md b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/README.md index 3aa7ccd7e..573d69fd4 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/README.md +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/README.md @@ -12,7 +12,7 @@ bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_mic Note that if having only one tflite as input, the final output directory will be /. -Example1: +Example: ``` bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model -- \ @@ -22,7 +22,7 @@ bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_mic A header file called, gen_micro_mutable_op_resolver.h will be created in /tmp/gen_dir/person_detect. -Example2: +Example: ``` bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model -- \ @@ -69,27 +69,31 @@ bazel run gen_dir/person_detect:micro_mutable_op_resolver_test By default the model will run without any generated input or verifying the output. This can be done by adding the flag --verify_output=1. -Example: +Example assuming gen_dir and /tmp/my_model.tflite exists: ``` bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model -- \ - --input_tflite_files=/tmp/my_model.tflite --output_dir=$(realpath gen_dir) + --common_tflite_path=/tmp/ \ + --input_tflite_files=my_model.tflite --output_dir=$(realpath gen_dir/my_model) bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model_test -- \ - --input_tflite_files=/tmp/my_model.tflite --output_dir=$(realpath gen_dir) --verify_output=1 + --input_tflite_file=/tmp/my_model.tflite --output_dir=$(realpath gen_dir) --verify_output=1 bazel run gen_dir/my_model:micro_mutable_op_resolver_test ``` +Note that since test script appends the name of the model in the output directory, we add that to the output directory for the generated header (gen_dir/my_model) so that header and test files ends up in same directory. + Depending on the size of the input model the arena size may need to be increased. Arena size can be set with --arena_size=. -Example: +Example assuming gen_dir and /tmp/big_model.tflite exists: ``` bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model -- \ - --input_tflite_files=/tmp/big_model.tflite --output_dir=gen_dir + --common_tflite_path=/tmp/ \ + --input_tflite_files=big_model.tflite --output_dir=$(realpath gen_dir/big_model) bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver:generate_micro_mutable_op_resolver_from_model_test -- \ - --input_tflite_files=/tmp/big_model.tflite --output_dir=gen_dir --verify_output=1 --arena_size=1000000 -bazel run tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/generated/big_model:micro_mutable_op_resolver_test + --input_tflite_file=/tmp/big_model.tflite --output_dir=$(realpath gen_dir) --verify_output=1 --arena_size=1000000 +bazel run gen_dir/big_model:micro_mutable_op_resolver_test ``` diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako index 68176c566..3de4ef406 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako @@ -75,7 +75,7 @@ void RunModel(const uint8_t* model, TfLiteTensor* output_tensor = interpreter.output(0); TF_LITE_MICRO_EXPECT_EQ(output_tensor->bytes, golden_size * sizeof(int8_t)); - int8_t* output = GetTensorData(output_tensor); + int8_t* output = ::tflite::GetTensorData(output_tensor); for (uint32_t i = 0; i < golden_size; i++) { // TODO(b/205046520): Better understand why TfLite and TFLM can sometimes be // off by 1. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/generate_cc_arrays.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/generate_cc_arrays.py index 41645f1f4..d5edbf068 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/generate_cc_arrays.py +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/generate_cc_arrays.py @@ -53,25 +53,27 @@ def generate_file(out_fname, array_name, array_type, array_contents, size): raise ValueError('generated file must be end with .cc or .h') +def bytes_to_hexstring(buffer): + """Convert a byte array to a hex string.""" + hex_values = [hex(buffer[i]) for i in range(len(buffer))] + out_string = ','.join(hex_values) + return out_string + + def generate_array(input_fname): """Return array size and array of data from the input file.""" if input_fname.endswith('.tflite'): with open(input_fname, 'rb') as input_file: - out_string = '' - byte = input_file.read(1) - size = 0 - while byte: - out_string += '0x' + byte.hex() + ',' - byte = input_file.read(1) - size += 1 - return [size, out_string] + buffer = input_file.read() + size = len(buffer) + out_string = bytes_to_hexstring(buffer) + return [size, out_string] elif input_fname.endswith('.bmp'): img = Image.open(input_fname, mode='r') image_bytes = img.tobytes() - out_string = '' - for byte in image_bytes: - out_string += hex(byte) + ',' - return [len(image_bytes), out_string] + size = len(image_bytes) + out_string = bytes_to_hexstring(image_bytes) + return [size, out_string] elif input_fname.endswith('.wav'): wav_file = wave.open(input_fname, mode='r') num_channels = wav_file.getnchannels() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile index 25c234d75..61d0e7fd6 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -267,8 +267,8 @@ endif # Kernel integration tests must be excluded on certain targets. MICRO_LITE_INTEGRATION_TESTS += $(shell find $(TENSORFLOW_ROOT)tensorflow/lite/micro/integration_tests -name Makefile.inc) -MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += $(shell find \ -$(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect -name Makefile.inc) +MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += \ + $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect/Makefile.inc) MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/benchmarks/Makefile.inc) @@ -357,6 +357,7 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/logistic.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/logistic_common.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/log_softmax.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/lstm_eval.cc \ +$(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/lstm_eval_common.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/maximum_minimum.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/micro_tensor_utils.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/mirror_pad.cc \ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh index 35fa6c7c8..9a5e4b703 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -47,9 +47,9 @@ if [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then echo >&2 "${DOWNLOADED_CMSIS_NN_PATH} already exists, skipping the download." else - ZIP_PREFIX_NN="e98ee09a03dd12d4b3eac6f7efa25d3ad62a24b9" + ZIP_PREFIX_NN="d071e9f70195559e7242709b8df3adeb7c50d0fb" CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip" - CMSIS_NN_MD5="a0e4b5f2c5c62405c304c7ffcc64af3b" + CMSIS_NN_MD5="0364a1a83f86a5104b893a4d21df7874" # wget is much faster than git clone of the entire repo. So we wget a specific # version and can then apply a patch, as needed. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc index 20ee1e4e0..3b282676a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc @@ -8,15 +8,24 @@ MICROLITE_CC_KERNEL_SRCS += \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/conv_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_vision.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_common_xtensa.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_int8.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pad_vision.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pooling_int8.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pooling_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/reduce_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/reshape_vision.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_vision.cc -ifeq ($(TARGET_ARCH), hifi5) +ifeq ($(TARGET_ARCH), hifimini) + # hifimini optimizations are implemented in the TFLM repository itself. + THIRD_PARTY_KERNEL_CC_SRCS += \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/svdf.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/fully_connected.cc + +else ifeq ($(TARGET_ARCH), hifi5) DOWNLOAD_RESULT := $(shell $(MAKEFILE_DIR)/ext_libs/xtensa_download.sh ${DOWNLOADS_DIR} hifi5 $(TENSORFLOW_ROOT)) ifneq ($(DOWNLOAD_RESULT), SUCCESS) $(error Something went wrong with the xtensa download: $(DOWNLOAD_RESULT)) @@ -124,7 +133,7 @@ else ifeq ($(TARGET_ARCH), vision_p6) INCLUDES += \ -I$(NNLIB_PATH)/flk/include \ -I$(NNLIB_PATH)/kernels/include/ \ - -I$(NNLIB_PATH)/runtime/include/ + -I$(NNLIB_PATH)/runtime/include/ LDFLAGS += -lidma else @@ -141,4 +150,10 @@ THIRD_PARTY_KERNEL_CC_SRCS += \ THIRD_PARTY_CC_HDRS += \ $(shell find $(FFT_PATH)/hifi3_fft -name "*.h") +else ifeq ($(TARGET_ARCH), hifimini) +THIRD_PARTY_KERNEL_CC_SRCS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.c") + +THIRD_PARTY_CC_HDRS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.h") endif diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc index 2325aa121..ad3d44c45 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -69,8 +69,16 @@ else MICROLITE_TEST_TARGETS += test_$(1) endif -test_$(1): $$($(1)_BINARY) +# For bluepill, the CI build is failing due to introduction of the +# introduction of test_run_latency.sh script. Looks at +# https://b.corp.google.com/issues/268565399#comment11 for more details. +ifneq ($(TARGET), bluepill) +test_$(1):$$($(1)_BINARY) + $(MAKEFILE_DIR)/test_latency_log.sh $(1) $$(TEST_SCRIPT) $$($(1)_BINARY) $$(TEST_PASS_STRING) $$(TARGET) +else +test_$(1):$$($(1)_BINARY) $$(TEST_SCRIPT) $$($(1)_BINARY) $$(TEST_PASS_STRING) $$(TARGET) +endif else run_$(1): $$($(1)_BINARY) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/renode_download.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/renode_download.sh index cbeba8930..f78038785 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/renode_download.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/renode_download.sh @@ -60,11 +60,6 @@ else echo >&2 "Unpacked to directory: ${DOWNLOADED_RENODE_PATH}" pip3 install -r ${DOWNLOADED_RENODE_PATH}/tests/requirements.txt >&2 - - pushd ${DOWNLOADED_RENODE_PATH} > /dev/null - create_git_repo ./ - apply_patch_to_folder ./ ../../renode.patch "TFLM patch" - popd > /dev/null fi echo "SUCCESS" diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/arc_custom_makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/arc_custom_makefile.inc index 1e6287d76..84e2d030e 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/arc_custom_makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/arc_custom_makefile.inc @@ -62,7 +62,6 @@ MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) EXCLUDED_EXAMPLE_TESTS := \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/Makefile.inc \ - $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/magic_wand/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/memory_footprint/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/micro_speech/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/network_tester/Makefile.inc \ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill/bluepill.lds b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill/bluepill.lds index 79a25642d..cfa9b39c2 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill/bluepill.lds +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill/bluepill.lds @@ -17,16 +17,20 @@ limitations under the License. https://github.com/google/stm32_bare_lib/blob/master/stm32_linker_layout.lds Modifications: - * increased the flash size to 1024K and RAM to 512K. This far exceeds the - actual hardware but enables running the tests in the emulator. + * increased the flash and RAM to 16MB (which far exceeds the actual + hardware) + +The primary purpose of using Renode in the TFLM repository is to be +able to run tests on a variety of models in simulation without being limited by +the constraints of the hardware. */ /* * 0x00000000 - 0x07ffffff - aliased to flash or sys memory depending on BOOT jumpers. - * 0x08000000 - 0x080fffff - Flash. + * 0x08000000 - 0x08ffffff - Flash. * 0x1ffff000 - 0x1ffff7ff - Boot firmware in system memory. * 0x1ffff800 - 0x1fffffff - Option bytes. - * 0x20000000 - 0x2007ffff - SRAM. + * 0x20000000 - 0x20ffffff - SRAM. * 0x40000000 - 0x40023400 - Peripherals */ @@ -34,8 +38,8 @@ limitations under the License. ENTRY(_main) MEMORY { -RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 512K -FLASH (rx) : ORIGIN = 0x8000000, LENGTH = 1024K +RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 16384K +FLASH (rx) : ORIGIN = 0x8000000, LENGTH = 16384K } /* Compute where the stack ends rather than hard coding it */ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc index fb86db7e0..c14bda496 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc @@ -73,7 +73,6 @@ EXCLUDED_TESTS += $(TENSORFLOW_ROOT)tensorflow/lite/micro/flatbuffer_utils_test. MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) EXCLUDED_EXAMPLE_TESTS := \ - $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/magic_wand/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/micro_speech/Makefile.inc MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS)) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc index 873113830..0ffe5a319 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc @@ -1,4 +1,4 @@ -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -199,7 +199,6 @@ INCLUDES += \ EXCLUDED_TESTS := \ tensorflow/lite/micro/memory_arena_threshold_test.cc \ tensorflow/lite/micro/recording_micro_allocator_test.cc - MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) TEST_SCRIPT := tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/xtensa_makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/xtensa_makefile.inc index 62af4b502..8d970c72b 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/xtensa_makefile.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/targets/xtensa_makefile.inc @@ -84,7 +84,6 @@ MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) EXCLUDED_EXAMPLE_TESTS := \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc \ - $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/magic_wand/Makefile.inc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/network_tester/Makefile.inc MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS)) MICRO_LITE_EXAMPLE_TESTS += $(shell find $(TENSORFLOW_ROOT)third_party/xtensa/examples/ -name Makefile.inc) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh new file mode 100755 index 000000000..7079285aa --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# This script is responsible for running the tests and also to log out the +# time (in seconds) it took to run the test file. It is using the linux time +# command to measure the latency. Setting the TIMEFORMAT to '%R' is providing +# us the real time latency. +# +# Called with following arguments: +# 1 - Name of the test file +# 2 - Name of the test script +# 3 - Name of the binary +# 4 - String output after all the tests are passed +# 5 - Name of the target +# The first parameter is used for logging purpose. The last four parameters are +# used to run the test. + +set -e + +TEST_FILE_NAME=${1} +TEST_SCRIPT=${2} +BINARY_NAME=${3} +TEST_PASS_STRING=${4} +TARGET_NAME=${5} + +# Output to stdout and stderr go to their normal places: +# Here we are opening 2 file descriptor, 3 and 4. FD 3 +# will redirect all the contents to stdout and 4 will +# redirect all the contents to stderr. Now when executing +# the TEST_SCRIPT command, we are redirecting all the stdout +# output of the command to FD 3 which will redirect everything +# to FD 1 (stdout) and all the stderr output of the command to +# FD 4 which will redirect everything to FD 2 (stderr). The +# output of the time command is captured in the time_log +# variable with the redirection of FD 2 (stderr) to FD 1 +# (stdout). Finally we are closing the FD 3 and 4.For more info +# https://stackoverflow.com/questions/4617489/get-values-from-time-command-via-bash-script +exec 3>&1 4>&2 +time_log=$( { TIMEFORMAT="%R"; time ${TEST_SCRIPT} ${BINARY_NAME} ${TEST_PASS_STRING} ${TARGET_NAME} 1>&3 2>&4; } 2>&1 ) # Captures time output only. +exec 3>&- 4>&- + +echo "Running ${TEST_FILE_NAME} took ${time_log} seconds" diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile b/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile index a0d462190..092ba7e07 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile @@ -109,13 +109,13 @@ clean: libtflm: $(LIB) -hello_world: libtflm - @mkdir -p $(BINDIR) - $(CXX) $(CXXFLAGS) $(wildcard examples/hello_world/*.cc) $(INCLUDES) $(LIB) -o $(BINDIR)/$@ +HELLO_WORLD_SRCS := $(wildcard examples/hello_world/*.cc) +HELLO_WORLD_SRCS += $(wildcard examples/hello_world/models/*.cc) +HELLO_WORLD_INCLUDES := $(INCLUDES) -I./examples/hello_world -magic_wand: libtflm +hello_world: libtflm @mkdir -p $(BINDIR) - $(CXX) $(CXXFLAGS) $(wildcard examples/magic_wand/*.cc) $(INCLUDES) $(LIB) -o $(BINDIR)/$@ + $(CXX) $(CXXFLAGS) $(HELLO_WORLD_SRCS) $(HELLO_WORLD_INCLUDES) $(LIB) -o $(BINDIR)/$@ MICRO_SPEECH_SRCS := $(wildcard examples/micro_speech/*.cc) MICRO_SPEECH_SRCS += $(wildcard examples/micro_speech/*/*.cc) @@ -134,4 +134,4 @@ person_detection: libtflm @mkdir -p $(BINDIR) $(CXX) $(CXXFLAGS) $(PERSON_DETECTION_SRCS) $(PERSON_DETECTION_THIRD_PARTY_SRCS) $(PERSON_DETECTION_INCLUDES) $(LIB) -o $(BINDIR)/$@ -examples: hello_world magic_wand micro_speech person_detection +examples: hello_world micro_speech person_detection diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py new file mode 100644 index 000000000..a77f97fb8 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py @@ -0,0 +1,222 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""An experimental tool to requantize a int8 activation, int8 weight LSTM based model to int16 activation, int8 weight + +Steps: +1. Convert the trained model to int8 using the TFLite converter. See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization +2. Use this tool to requantize the int8 model to int16. +3. Check if the requantized model match the expectation (e.g., read the conversion printout, perform inference tests) + +The conversion process: +1. Requantize the ops specified in _COMPLEX_OP_REQUANTIZE_REGISTRATION using the registered function. Bias type conversion (int32 to int64) only happens here. +2. Requantize all non-constant tensors with int8 type to int16 (and fix the quantization parameters) + +Run: +bazel build tensorflow/lite/micro/tools:requantize_flatbuffer +bazel-bin/tensorflow/lite/micro/tools/requantize_flatbuffer --int8_model_path=".tflite file path"` --save_path="save path" + +CAVEAT: +1. Use this tool ONLY for models that contain the LSTM layer. All other models should use the standard tflite conversion process. +2. This is an experimental tool. ALWAYS check if the converted model matches your expectation +3. Add the custom op requantization function for complex ops (e.g., convolution). +4. We assume ops not in _COMPLEX_OP_REQUANTIZE_REGISTRATION only have activation tensors (i.e. no weights and bias). Check the quantized model performance if you add additional ops to _TESTED_SIMPLE_OPS + +""" +import os + +import numpy as np +from absl import app +from absl import flags +from absl import logging + +from tflite_micro.tensorflow.lite.tools import flatbuffer_utils +from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer_utils +from tflite_micro.tensorflow.lite.python import schema_py_generated + +FLAGS = flags.FLAGS + +flags.DEFINE_string("int8_model_path", + default=None, + help="the int8 model path.") +flags.DEFINE_string("save_path", + default=None, + help="path to save the requantized model.") + +# key: BuiltinOperator (see tensorflow/lite/schema/schema.fbs) +# Val: the requantize function defined in requantize_flatbuffer_utils.py +# FULLY_CONNECTED, CONV_2D, DEPTHWISE_CONV_2D share the same requantize function +# since they all share the same input/weight/bias configuration. +_COMPLEX_OP_REQUANTIZE_REGISTRATION = { + schema_py_generated.BuiltinOperator.FULLY_CONNECTED: + requantize_flatbuffer_utils.requantize_fully_connected, + schema_py_generated.BuiltinOperator.UNIDIRECTIONAL_SEQUENCE_LSTM: + requantize_flatbuffer_utils.requantize_unidirectional_sequence_lstm, + schema_py_generated.BuiltinOperator.SOFTMAX: + requantize_flatbuffer_utils.requantize_softmax, + schema_py_generated.BuiltinOperator.CONV_2D: + requantize_flatbuffer_utils.requantize_fully_connected, + schema_py_generated.BuiltinOperator.DEPTHWISE_CONV_2D: + requantize_flatbuffer_utils.requantize_fully_connected, + schema_py_generated.BuiltinOperator.TRANSPOSE_CONV: + requantize_flatbuffer_utils.requantize_transpose_conv, +} + +# List of tested simple operators (no weight and bias, e.g., reshape) see tensorflow/lite/schema/schema.fbs for op code names +_TESTED_SIMPLE_OPS = [ + schema_py_generated.BuiltinOperator.RESHAPE, + schema_py_generated.BuiltinOperator.QUANTIZE, + schema_py_generated.BuiltinOperator.DEQUANTIZE, + schema_py_generated.BuiltinOperator.MEAN, + schema_py_generated.BuiltinOperator.SQUARED_DIFFERENCE, + schema_py_generated.BuiltinOperator.ADD, + schema_py_generated.BuiltinOperator.RSQRT, + schema_py_generated.BuiltinOperator.MUL, + schema_py_generated.BuiltinOperator.SUB, + schema_py_generated.BuiltinOperator.LEAKY_RELU, + schema_py_generated.BuiltinOperator.LOGISTIC, + schema_py_generated.BuiltinOperator.PAD +] + +_SUPPORTED_OPS = set( + list(_COMPLEX_OP_REQUANTIZE_REGISTRATION.keys()) + _TESTED_SIMPLE_OPS) + + +class Requantizer: + """Requantize an int8 activation model to int16""" + + def __init__(self, int8_model): + """Initialize the int8 to int16 converter. + + Args: + int8_model: flatbuffer python object + """ + self.model = int8_model + self.remaining_tensors = set() + for subgraph in self.model.subgraphs: + for tensor in subgraph.tensors: + self.remaining_tensors.add(tensor) + + @classmethod + def from_file(self, model_path): + """Instantiates a converter from a int8 quantized .tflite filepath. + + Args: + model_path: Filepath to the .tflite model + + Returns: + An Int8ToInt16Converter instance + """ + int8_model = flatbuffer_utils.read_model(model_path) + return Requantizer(int8_model) + + @classmethod + def from_bytes(self, bytearray): + """Instantiates a converter from a int8 quantized .tflite bytearray. + + Args: + bytearray: Content of the .tflite model + + Returns: + An Int8ToInt16Converter instance + """ + int8_model = flatbuffer_utils.convert_bytearray_to_object(bytearray) + return Requantizer(int8_model) + + def _remove_tensor(self, tensor): + """Remove tensor from the tensor pool""" + if tensor in self.remaining_tensors: + self.remaining_tensors.remove(tensor) + + def _remove_op_tensors(self, tensors, op): + """Remove tensors in an operator from the tensor pool + + Args: + tensors: tensors in the subgraph + op : the operator + """ + for id in op.inputs: + # -1 means non-used tensor + if id != -1: + self._remove_tensor(tensors[id]) + for id in op.outputs: + if id != -1: + self._remove_tensor(tensors[id]) + + def _convert_ops(self): + """Convert all ops registered in _OP_CONVERSION_REGISTRATION from int8 to int16 (activation type)""" + op_codes = self.model.operatorCodes + for subgraph in self.model.subgraphs: + tensors = subgraph.tensors + for op in subgraph.operators: + op_code = op_codes[op.opcodeIndex].builtinCode + op_name = flatbuffer_utils.opcode_to_name(self.model, op.opcodeIndex) + if op_code not in _SUPPORTED_OPS: + raise RuntimeError( + f"Operator {op_name} is not supported. If the operator contains weight/bias, develop and register the corresponding requantize function in _COMPLEX_OP_CONVERSION_REGISTRATION. Otherwise, try add the op code to _TESTED_SIMPLE_OPS and validate the requantized model " + ) + if op_code in _COMPLEX_OP_REQUANTIZE_REGISTRATION: + logging.info(f"Convert operator {op_name}") + _COMPLEX_OP_REQUANTIZE_REGISTRATION[op_code](tensors, + self.model.buffers, op) + self._remove_op_tensors(tensors, op) + + def _change_tensor_activation_type(self): + """Change all remaining tensor types from int8 to int16""" + for subgraph in self.model.subgraphs: + for tensor in subgraph.tensors: + if ((tensor in self.remaining_tensors) + and (requantize_flatbuffer_utils.TENSOR_CODE_TYPE[tensor.type] + == np.int8) and ("const" not in str(tensor.name))): + requantize_flatbuffer_utils.change_activation_tensor_8to16( + tensor, self.model.buffers) + self._remove_tensor(tensor) + + def requantize_8to16(self): + ''' + The requantize process has two phase: + 1. Go through the registered ops and perform the custom op transformation + 2. Go through the rest of tensors and convert int8 non-const tensor to int16 + ''' + + logging.info("Reset Operators") + self._convert_ops() + logging.info("Set Remaining Activation Types") + self._change_tensor_activation_type() + logging.info("Remaining Tensors:") + for tensor in self.remaining_tensors: + logging.info( + f"{tensor.name}, tensor type {flatbuffer_utils.type_to_name(tensor.type)}" + ) + + def save_model(self, output_path): + """Save the requantized model to a specificed location.""" + flatbuffer_utils.write_model(self.model, output_path) + + def model_bytearray(self): + """Get the flatbuffer bytearray""" + return flatbuffer_utils.convert_object_to_bytearray(self.model) + + +def main(_): + if not os.path.exists(FLAGS.int8_model_path): + raise ValueError( + "Model file does not exist. Please check the .tflite model path.") + requantizer = Requantizer.from_file(FLAGS.int8_model_path) + requantizer.requantize_8to16() + requantizer.save_model(FLAGS.save_path) + + +if __name__ == "__main__": + app.run(main) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py new file mode 100644 index 000000000..3dae5a8a2 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py @@ -0,0 +1,115 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import os + +import numpy as np +import tensorflow as tf + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer +from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime +from tflite_micro.tensorflow.lite.tools import flatbuffer_utils + + +#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test +def create_simple_fc_model(): + '''Create a simple model with two fully connected(fc) layers''' + model = tf.keras.models.Sequential([ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(50, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="output") + ]) + fixed_input = tf.keras.layers.Input(shape=[28, 28], + batch_size=1, + dtype=model.inputs[0].dtype, + name="fixed_input") + fixed_output = model(fixed_input) + return tf.keras.models.Model(fixed_input, fixed_output) + + +def representative_dataset_gen(num_samples=100): + np.random.seed(42) #Seed the random number generator + for _ in range(num_samples): + yield [np.random.random((1, 28, 28)).astype(np.float32)] + + +def convert_tfl_converter(keras_model, + representative_dataset_gen, + int16=False): + '''Convert and quantize the keras model using the standard tflite converter''' + converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + if int16: + converter.target_spec.supported_ops = [ + tf.lite.OpsSet. + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 + ] + converter.representative_dataset = representative_dataset_gen + return converter.convert() + + +def convert_8to16_requantizer(keras_model, representative_dataset_gen): + '''Convert and quantize the keras model using the int8 to int16 conversion tool''' + # Convert to int8 first + int8_model = convert_tfl_converter(keras_model, + representative_dataset_gen, + int16=False) + int8_model = flatbuffer_utils.convert_bytearray_to_object(int8_model) + # Use the tool to convert to int16 + requantizer = requantize_flatbuffer.Requantizer(int8_model) + requantizer.requantize_8to16() + return flatbuffer_utils.convert_object_to_bytearray(requantizer.model) + + +class SimpleFCModelTest(test_util.TensorFlowTestCase): + + def testCompareWithStandardConversion(self): + + def inference(tflm_interpreter, data_x): + tflm_interpreter.set_input(data_x, 0) + tflm_interpreter.invoke() + return tflm_interpreter.get_output(0) + + keras_model = create_simple_fc_model( + ) # int16 fc is supported in tflite converter + tfl_converted_int16_model = convert_tfl_converter( + keras_model, representative_dataset_gen, int16=True) + int8_converted_int16_model = convert_8to16_requantizer( + keras_model, representative_dataset_gen) + + interpreter_tfl_converted = tflm_runtime.Interpreter.from_bytes( + tfl_converted_int16_model) + interpreter_tool_converted = tflm_runtime.Interpreter.from_bytes( + int8_converted_int16_model) + + num_steps = 10 + # Give the same (random) input to both interpreters to confirm that the outputs are similar. + for _ in range(0, num_steps): + data_x = np.random.random((1, 28, 28)).astype("float32") + + tfl_converted_result = inference(interpreter_tfl_converted, data_x)[0] + tool_converted_result = inference(interpreter_tool_converted, data_x)[0] + + max_diff = max(abs(tool_converted_result - tfl_converted_result)) + self.assertLess( + max_diff, 1e-4 + ) # can not be the same since int8 model loses some range information + + +if __name__ == "__main__": + test.main() diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py new file mode 100644 index 000000000..5709ff2cf --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py @@ -0,0 +1,325 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import numpy as np +from absl import logging +from tflite_micro.tensorflow.lite.python.schema_py_generated import TensorType + +# Map flatbuffer tensor type code to numpy data type. see Table TensorType in tensorflow/lite/schema/schema.fbs +# TODO(b/269487423): use a common util function instead +TENSOR_CODE_TYPE = { + TensorType.FLOAT32: np.float32, + TensorType.FLOAT16: np.float16, + TensorType.INT32: np.int32, + TensorType.UINT8: np.uint8, + TensorType.INT64: np.int64, + TensorType.STRING: np.string_, + TensorType.BOOL: np.bool_, + TensorType.INT16: np.int16, + TensorType.COMPLEX64: np.complex64, + TensorType.INT8: np.int8, + TensorType.FLOAT64: np.float64, + TensorType.COMPLEX128: np.complex128, + TensorType.UINT64: np.uint64, + TensorType.RESOURCE: "RESOURCE", + TensorType.VARIANT: "VARIANT", + TensorType.UINT32: np.uint32, + TensorType.UINT16: np.uint16, + TensorType.INT4: "INT4", +} + +# TODO(b/269487423): use a common util function instead +TENSOR_TYPE_CODE = dict((reversed(item) for item in TENSOR_CODE_TYPE.items())) + + +def clip_range(vals, bit_width): + """Mimic integer calculation. + + Clip the range of vals based on bit width. + + e.g., clip_range([300], 8) = [127] since int8 have range [-128, 127] + + Args: + vals (np.array): float representation of the integer values + bit_width (int): number of desired bits for vals + + Returns: + np.array : clipped vals + """ + # Numpy integer calculation does not do saturation. Implement here + min_val = -2**(bit_width - 1) + max_val = 2**(bit_width - 1) - 1 + if vals.max() > max_val or vals.min() < min_val: + logging.info(f"WARNING: integer overflow!") + return np.clip(vals, min_val, max_val) + + +def quantize_data(data, scale, zero_point=0, bit_width=8): + """Quantize the data to integer type with desired bit width. + + The quantized data is represented using float since integer calculation in + numpy may differ from other implementations (e.g., no integer saturation + protection in numpy) + + Args: + data (np.array): float data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + bit_width (int): number of representative bits for vals + + Returns: + np.array : quantized data in float but clipped range + """ + vals = np.round(data / scale) + zero_point + return clip_range(vals, bit_width) + + +def dequantize_data(quantized_data, scale, zero_point=0): + """Dequantize the data to integer type with desired bit width. + + Args: + quantized_data (np.array): quantized data + scale (float): quantization scale of the data + zero_point (integer): quantization zero point of the data + + Returns: + np.array : dequantized data + """ + return scale * (quantized_data - zero_point) + + +def change_quantization_settings_8to16(tensor, buffers): + """Change the quantization seeting of the tensor from int8 to int16""" + + if (tensor.quantization.quantizedDimension != 0): + raise RuntimeError( + "Only layer level quantization is supported. Per channel quantization is not supported now" + ) + + scale = tensor.quantization.scale[0] + zero_point = tensor.quantization.zeroPoint[0] + + # Set MAX_INT8 from 127 to 128 to compromise the range precision loss due to int8 quantization + MIN_INT8, MAX_INT8 = -128, 128 + # Narrow range (-min == max) is used for symmetrical quantization + MIN_INT16, MAX_INT16 = -32767, 32767 + + # Asymmertical quantized: scale * (qmax - zero_point) = rmax + rmax = scale * (MAX_INT8 - zero_point) + rmin = scale * (MIN_INT8 - zero_point) + # symmertical quantized: scale * qmax = rmax + scale_16 = max(abs(rmax), abs(rmin)) / abs(MIN_INT16) + # Change scale: Symmetrical Quantized + tensor.quantization.scale = [scale_16] + tensor.quantization.zeroPoint = [0] + + # requantize the buffer data to int16 if necessary + tensor_buffer = buffers[tensor.buffer] + if type(tensor_buffer.data) != type(None): + expected_buffer_size = np.prod(tensor.shape) + data = np.frombuffer(tensor_buffer.data, dtype=np.int8) + # Different ops may share one buffer. No need to requantize the buffer + # if the buffer has already been processed to int16 (2 bytes) + if data.nbytes == expected_buffer_size * 2: + return + elif data.nbytes != expected_buffer_size: + raise RuntimeError( + f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}" + ) + dequantized_data = dequantize_data(data, tensor.quantization.scale, + tensor.quantization.zeroPoint) + int16_data = quantize_data(dequantized_data, scale_16, 0, + 16).astype(np.int16) + tensor_buffer.data = int16_data.tobytes() + + +def change_activation_tensor_8to16(tensor, buffers): + """Change the quantization setting of a activation tensor from int8 to int16""" + if tensor.type == TENSOR_TYPE_CODE[np.int8]: + change_quantization_settings_8to16(tensor, buffers) + tensor.type = TENSOR_TYPE_CODE[np.int16] + logging.info(f"Set {tensor.name} from int8 to int16 ") + + +def requantize_bias_perlayer(buffers, input, weight, bias): + """Bias is layer wise quantized """ + bias_buffer = buffers[bias.buffer] + bias_scale = bias.quantization.scale[0] + bias_zero_pt = bias.quantization.zeroPoint[0] + data = np.frombuffer(bias_buffer.data, dtype=np.int32) + + # change scale and zero point + bias_scale_int64 = (input.quantization.scale[0] * + weight.quantization.scale[0]) + bias_zero_pt_int64 = 0 # symmetrical quantized + bias.type = TENSOR_TYPE_CODE[np.int64] + bias.quantization.scale = [bias_scale_int64] + bias.quantization.zeroPoint = [bias_zero_pt_int64] + + expected_buffer_size = bias.shape[0] # bias has only one dimension + # Different ops may share one buffer. No need to requantize the buffer + # if the buffer has already been processed to int64 (8 bytes) + if data.nbytes == expected_buffer_size * 8: + return + elif data.nbytes != expected_buffer_size * 4: + raise RuntimeError( + f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}" + ) + dequantized_data = dequantize_data(data, bias_scale, bias_zero_pt) + int64_data = quantize_data(dequantized_data, bias_scale_int64, + bias_zero_pt_int64, 64).astype(np.int64) + bias_buffer.data = int64_data.tobytes() + + +def requantize_bias_perchannel(buffers, input, weight, bias): + """Bias is channel wise quantized. Requantize bias one by one """ + bias_buffer = buffers[bias.buffer] + data = np.frombuffer(bias_buffer.data, dtype=np.int32) + expected_buffer_size = bias.shape[0] # bias has only one dimension + # whether to requantize the bias buffer, False if the buffer has already been requantized + requantize_buffer = True + # Different ops may share one buffer. No need to requantize the buffer + # if the buffer has already been processed to int64 (8 bytes) + if data.nbytes == expected_buffer_size * 8: + requantize_buffer = False + elif data.nbytes != expected_buffer_size * 4: + raise RuntimeError( + f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}" + ) + if len(bias.quantization.scale) != len(weight.quantization.scale): + raise RuntimeError( + f" Per channel quantization requires number of bias scales ({len(bias.quantization.scale)}),\ + equals to number of weight scales ({len(weight.quantization.scale)}) " + ) + requantized_data = [] + requantized_scales = [] + requantized_zero_points = [] + for element_data, bias_scale, weight_scale, bias_zero_point in zip( + data, bias.quantization.scale, weight.quantization.scale, + bias.quantization.zeroPoint): + bias_scale_int64 = (input.quantization.scale[0] * weight_scale) + bias_zero_pt_int64 = 0 # symmetrical quantized + requantized_scales.append(bias_scale_int64) + requantized_zero_points.append(bias_zero_pt_int64) + + if requantize_buffer: + dequantized_data = dequantize_data(element_data, bias_scale, + bias_zero_point) + int64_data = quantize_data(dequantized_data, bias_scale_int64, + bias_zero_pt_int64, 64).astype(np.int64) + requantized_data.append(int64_data) + + bias.type = TENSOR_TYPE_CODE[np.int64] + bias.quantization.scale = requantized_scales + bias.quantization.zeroPoint = requantized_zero_points + if requantize_buffer: + bias_buffer.data = np.array(requantized_data).tobytes() + + +def set_bias_type_int64(buffers, input, weight, bias): + """Set the bias tensor quantization setting from int32 to int64 + + Args: + buffers (list): buffers for the model + input (Tensor): the corresponding input tensor for the bias + weight (Tensor): the corresponding weight tensor for the bias + bias (Tensor): the bias tensor that need to be modified + """ + if bias.type == TENSOR_TYPE_CODE[np.int32]: + if len(bias.quantization.scale) == 1: + requantize_bias_perlayer(buffers, input, weight, bias) + else: + requantize_bias_perchannel(buffers, input, weight, bias) + + +def requantize_fully_connected(tensors, buffers, op): + """Requantize the fully connected op from int8 to int16 + + Note: CONV_2D and DEPTHWISE_CONV_2D also use this requantize function since they all share the same input/weight/bias configuration. + See tensorflow/lite/micro/kernels/fully_connected_common.cc + tflite_micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc + tflite_micro/tensorflow/lite/micro/kernels/conv_common.cc + """ + # Indices are from tensorflow/lite/micro/kernels/fully_connected_common.cc + input_tensor = tensors[op.inputs[0]] + # weight stays the same, no change needed + weight_tensor = tensors[op.inputs[1]] + output_tensor = tensors[op.outputs[0]] + + change_activation_tensor_8to16(input_tensor, buffers) + change_activation_tensor_8to16(output_tensor, buffers) + # if the bias does not exist, op.inputs[2] == -1 + if op.inputs[2] != -1: + bias_tensor = tensors[op.inputs[2]] + set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor) + + +def requantize_unidirectional_sequence_lstm(tensors, buffers, op): + """Requantize the unidirectonal sequance lstm op from int8 to int16 """ + input_tensor = tensors[op.inputs[0]] + hidden_state_tensor = tensors[op.inputs[18]] + output_tensor = tensors[op.outputs[0]] + + # Indices are from tensorflow/lite/micro/kernels/lstm_shared.h + input_weights_idx = [1, 2, 3, 4] + recurrent_weights_idx = [5, 6, 7, 8] + bias_idx = [12, 13, 14, 15] + + change_activation_tensor_8to16(input_tensor, buffers) + change_activation_tensor_8to16(hidden_state_tensor, buffers) + change_activation_tensor_8to16(output_tensor, buffers) + + for weight_id, bias_id in zip(input_weights_idx, bias_idx): + weight_tensor = tensors[op.inputs[weight_id]] + bias_tensor = tensors[op.inputs[bias_id]] + set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor) + + # recurrent weights have no associated biases + for weight_id in recurrent_weights_idx: + weight_tensor = tensors[op.inputs[weight_id]] + + +def requantize_softmax(tensors, buffers, op): + """Requantize the softmax op from int8 to int16""" + input_tensor = tensors[op.inputs[0]] + output_tensor = tensors[op.outputs[0]] + + # Change input type + change_activation_tensor_8to16(input_tensor, buffers) + + # Output range is always [0,1] + if output_tensor.type == TENSOR_TYPE_CODE[np.int8]: + # change quantization settings + output_tensor.quantization.scale = [1 / 32768] + output_tensor.quantization.zeroPoint = [0] + # Set tensor type + output_tensor.type = TENSOR_TYPE_CODE[np.int16] + logging.info(f"Set {output_tensor.name} from int8 to int16 ") + + +def requantize_transpose_conv(tensors, buffers, op): + """Requantize the transpose conv op from int8 to int16""" + # Indices are from tensorflow/lite/micro/kernels/transpose_conv.cc + input_tensor = tensors[op.inputs[2]] + # weight stays the same, no change needed + weight_tensor = tensors[op.inputs[1]] + output_tensor = tensors[op.outputs[0]] + + change_activation_tensor_8to16(input_tensor, buffers) + change_activation_tensor_8to16(output_tensor, buffers) + # if the bias does not exist, op.inputs[2] == -1 + if len(op.inputs) > 3: + if op.inputs[3] != -1: + bias_tensor = tensors[op.inputs[3]] + set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/schema/schema_generated.h b/third_party/tflite-micro/tensorflow/lite/schema/schema_generated.h index 266ac1405..b64b54be9 100755 --- a/third_party/tflite-micro/tensorflow/lite/schema/schema_generated.h +++ b/third_party/tflite-micro/tensorflow/lite/schema/schema_generated.h @@ -543,6 +543,18 @@ struct SignOptions; struct SignOptionsBuilder; struct SignOptionsT; +struct BitcastOptions; +struct BitcastOptionsBuilder; +struct BitcastOptionsT; + +struct BitwiseXorOptions; +struct BitwiseXorOptionsBuilder; +struct BitwiseXorOptionsT; + +struct RightShiftOptions; +struct RightShiftOptionsBuilder; +struct RightShiftOptionsT; + struct OperatorCode; struct OperatorCodeBuilder; struct OperatorCodeT; @@ -1059,11 +1071,14 @@ enum BuiltinOperator : int32_t { BuiltinOperator_ATAN2 = 156, BuiltinOperator_UNSORTED_SEGMENT_MIN = 157, BuiltinOperator_SIGN = 158, + BuiltinOperator_BITCAST = 159, + BuiltinOperator_BITWISE_XOR = 160, + BuiltinOperator_RIGHT_SHIFT = 161, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_SIGN + BuiltinOperator_MAX = BuiltinOperator_RIGHT_SHIFT }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[159] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[162] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -1223,13 +1238,16 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[159] { BuiltinOperator_UNSORTED_SEGMENT_SUM, BuiltinOperator_ATAN2, BuiltinOperator_UNSORTED_SEGMENT_MIN, - BuiltinOperator_SIGN + BuiltinOperator_SIGN, + BuiltinOperator_BITCAST, + BuiltinOperator_BITWISE_XOR, + BuiltinOperator_RIGHT_SHIFT }; return values; } inline const char * const *EnumNamesBuiltinOperator() { - static const char * const names[160] = { + static const char * const names[163] = { "ADD", "AVERAGE_POOL_2D", "CONCATENATION", @@ -1389,13 +1407,16 @@ inline const char * const *EnumNamesBuiltinOperator() { "ATAN2", "UNSORTED_SEGMENT_MIN", "SIGN", + "BITCAST", + "BITWISE_XOR", + "RIGHT_SHIFT", nullptr }; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_SIGN)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_RIGHT_SHIFT)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOperator()[index]; } @@ -1525,11 +1546,14 @@ enum BuiltinOptions : uint8_t { BuiltinOptions_UnsortedSegmentSumOptions = 121, BuiltinOptions_ATan2Options = 122, BuiltinOptions_SignOptions = 123, + BuiltinOptions_BitcastOptions = 124, + BuiltinOptions_BitwiseXorOptions = 125, + BuiltinOptions_RightShiftOptions = 126, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_SignOptions + BuiltinOptions_MAX = BuiltinOptions_RightShiftOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[124] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[127] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1654,13 +1678,16 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[124] { BuiltinOptions_UnsortedSegmentMinOptions, BuiltinOptions_UnsortedSegmentSumOptions, BuiltinOptions_ATan2Options, - BuiltinOptions_SignOptions + BuiltinOptions_SignOptions, + BuiltinOptions_BitcastOptions, + BuiltinOptions_BitwiseXorOptions, + BuiltinOptions_RightShiftOptions }; return values; } inline const char * const *EnumNamesBuiltinOptions() { - static const char * const names[125] = { + static const char * const names[128] = { "NONE", "Conv2DOptions", "DepthwiseConv2DOptions", @@ -1785,13 +1812,16 @@ inline const char * const *EnumNamesBuiltinOptions() { "UnsortedSegmentSumOptions", "ATan2Options", "SignOptions", + "BitcastOptions", + "BitwiseXorOptions", + "RightShiftOptions", nullptr }; return names; } inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { - if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_SignOptions)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_RightShiftOptions)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOptions()[index]; } @@ -2292,6 +2322,18 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SignOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BitcastOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BitwiseXorOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RightShiftOptions; +}; + template struct BuiltinOptionsUnionTraits { static const BuiltinOptions enum_value = BuiltinOptions_NONE; }; @@ -2788,6 +2830,18 @@ template<> struct BuiltinOptionsUnionTraits { static const BuiltinOptions enum_value = BuiltinOptions_SignOptions; }; +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BitcastOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BitwiseXorOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RightShiftOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -3802,6 +3856,30 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_SignOptions ? reinterpret_cast(value) : nullptr; } + tflite::BitcastOptionsT *AsBitcastOptions() { + return type == BuiltinOptions_BitcastOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BitcastOptionsT *AsBitcastOptions() const { + return type == BuiltinOptions_BitcastOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BitwiseXorOptionsT *AsBitwiseXorOptions() { + return type == BuiltinOptions_BitwiseXorOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BitwiseXorOptionsT *AsBitwiseXorOptions() const { + return type == BuiltinOptions_BitwiseXorOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::RightShiftOptionsT *AsRightShiftOptions() { + return type == BuiltinOptions_RightShiftOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::RightShiftOptionsT *AsRightShiftOptions() const { + return type == BuiltinOptions_RightShiftOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -11556,6 +11634,123 @@ inline flatbuffers::Offset CreateSignOptions( flatbuffers::Offset CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct BitcastOptionsT : public flatbuffers::NativeTable { + typedef BitcastOptions TableType; +}; + +struct BitcastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BitcastOptionsT NativeTableType; + typedef BitcastOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + BitcastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BitcastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BitcastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BitcastOptionsBuilder { + typedef BitcastOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BitcastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBitcastOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + BitcastOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBitcastOptions(flatbuffers::FlatBufferBuilder &_fbb, const BitcastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BitwiseXorOptionsT : public flatbuffers::NativeTable { + typedef BitwiseXorOptions TableType; +}; + +struct BitwiseXorOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BitwiseXorOptionsT NativeTableType; + typedef BitwiseXorOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + BitwiseXorOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BitwiseXorOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BitwiseXorOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BitwiseXorOptionsBuilder { + typedef BitwiseXorOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BitwiseXorOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBitwiseXorOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + BitwiseXorOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBitwiseXorOptions(flatbuffers::FlatBufferBuilder &_fbb, const BitwiseXorOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RightShiftOptionsT : public flatbuffers::NativeTable { + typedef RightShiftOptions TableType; +}; + +struct RightShiftOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RightShiftOptionsT NativeTableType; + typedef RightShiftOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RightShiftOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RightShiftOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RightShiftOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RightShiftOptionsBuilder { + typedef RightShiftOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RightShiftOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRightShiftOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RightShiftOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRightShiftOptions(flatbuffers::FlatBufferBuilder &_fbb, const RightShiftOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; int8_t deprecated_builtin_code = 0; @@ -12068,6 +12263,15 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const tflite::SignOptions *builtin_options_as_SignOptions() const { return builtin_options_type() == tflite::BuiltinOptions_SignOptions ? static_cast(builtin_options()) : nullptr; } + const tflite::BitcastOptions *builtin_options_as_BitcastOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BitcastOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BitwiseXorOptions *builtin_options_as_BitwiseXorOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BitwiseXorOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::RightShiftOptions *builtin_options_as_RightShiftOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_RightShiftOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -12596,6 +12800,18 @@ template<> inline const tflite::SignOptions *Operator::builtin_options_as inline const tflite::BitcastOptions *Operator::builtin_options_as() const { + return builtin_options_as_BitcastOptions(); +} + +template<> inline const tflite::BitwiseXorOptions *Operator::builtin_options_as() const { + return builtin_options_as_BitwiseXorOptions(); +} + +template<> inline const tflite::RightShiftOptions *Operator::builtin_options_as() const { + return builtin_options_as_RightShiftOptions(); +} + struct OperatorBuilder { typedef Operator Table; flatbuffers::FlatBufferBuilder &fbb_; @@ -16931,6 +17147,75 @@ inline flatbuffers::Offset CreateSignOptions(flatbuffers::FlatBuffe _fbb); } +inline BitcastOptionsT *BitcastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BitcastOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BitcastOptions::UnPackTo(BitcastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset BitcastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BitcastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBitcastOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBitcastOptions(flatbuffers::FlatBufferBuilder &_fbb, const BitcastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BitcastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBitcastOptions( + _fbb); +} + +inline BitwiseXorOptionsT *BitwiseXorOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BitwiseXorOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BitwiseXorOptions::UnPackTo(BitwiseXorOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset BitwiseXorOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BitwiseXorOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBitwiseXorOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBitwiseXorOptions(flatbuffers::FlatBufferBuilder &_fbb, const BitwiseXorOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BitwiseXorOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBitwiseXorOptions( + _fbb); +} + +inline RightShiftOptionsT *RightShiftOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new RightShiftOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void RightShiftOptions::UnPackTo(RightShiftOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RightShiftOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RightShiftOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRightShiftOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRightShiftOptions(flatbuffers::FlatBufferBuilder &_fbb, const RightShiftOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RightShiftOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRightShiftOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = std::unique_ptr(new OperatorCodeT()); UnPackTo(_o.get(), _resolver); @@ -17966,6 +18251,18 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_BitcastOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BitwiseXorOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RightShiftOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return true; } } @@ -18477,6 +18774,18 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_BitcastOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BitwiseXorOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RightShiftOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -18976,6 +19285,18 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateSignOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_BitcastOptions: { + auto ptr = reinterpret_cast(value); + return CreateBitcastOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BitwiseXorOptions: { + auto ptr = reinterpret_cast(value); + return CreateBitwiseXorOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RightShiftOptions: { + auto ptr = reinterpret_cast(value); + return CreateRightShiftOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -19474,6 +19795,18 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) : value = new tflite::SignOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_BitcastOptions: { + value = new tflite::BitcastOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BitwiseXorOptions: { + value = new tflite::BitwiseXorOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RightShiftOptions: { + value = new tflite::RightShiftOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -20096,6 +20429,21 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_BitcastOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BitwiseXorOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RightShiftOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr;