diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version index 9874f722c..a7b3835fb 100644 --- a/conf/tflite-micro.version +++ b/conf/tflite-micro.version @@ -1 +1 @@ -8746ec9 +9eb4fb1 diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc index 827312b45..602d9e078 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc @@ -219,11 +219,11 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) { return kTfLiteOk; } -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data) { +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data) { if (tensor->allocation_type != kTfLiteDynamic && tensor->allocation_type != kTfLitePersistentRo) { - return; + return kTfLiteOk; } #ifdef TF_LITE_TENSORFLOW_PROFILER tflite::PauseHeapMonitoring(/*pause=*/true); @@ -258,9 +258,15 @@ void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, tflite::PauseHeapMonitoring(/*pause=*/false); #endif tensor->bytes = num_bytes; + if (tensor->data.data == nullptr && num_bytes != 0) { + // We are done allocating but tensor is pointing to null and a valid size + // was requested, so we error. + return kTfLiteError; + } + return kTfLiteOk; } -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true); } #endif // TF_LITE_STATIC_MEMORY @@ -331,4 +337,18 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) { delete tflite_delegate; } +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) { + if (!delegate) return nullptr; + + // The following cast is safe only because this code is part of the + // TF Lite runtime implementation. Apps using TF Lite should not rely on + // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent. + const auto* tflite_delegate = + reinterpret_cast(delegate); + + if (!tflite_delegate->opaque_delegate_builder) return nullptr; + + return tflite_delegate->opaque_delegate_builder->data; +} + } // extern "C" diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.h b/third_party/tflite-micro/tensorflow/lite/core/c/common.h index 46d5e650a..cf96a1163 100644 --- a/third_party/tflite-micro/tensorflow/lite/core/c/common.h +++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_ #define TENSORFLOW_LITE_CORE_C_COMMON_H_ +#include #include #include #include @@ -179,10 +180,23 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); (context)->ReportError((context), __VA_ARGS__); \ } \ } while (false) +#define TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, ...) \ + do { \ + TfLiteOpaqueContextReportError((opaque_context), __VA_ARGS__); \ + } while (false) +#define TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(opaque_context, ...) \ + do { \ + if ((opaque_context) != nullptr) { \ + TfLiteOpaqueContextReportError((opaque_context), __VA_ARGS__); \ + } \ + } while (false) #else // TF_LITE_STRIP_ERROR_STRINGS #define ARGS_UNUSED(...) (void)sizeof(#__VA_ARGS__) #define TF_LITE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__) #define TF_LITE_MAYBE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__) +#define TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, ...) ARGS_UNUSED(__VA_ARGS__) +#define TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(opaque_context, ...) \ + ARGS_UNUSED(__VA_ARGS__) #endif // TF_LITE_STRIP_ERROR_STRINGS // Check whether value is true, and if not return kTfLiteError from @@ -195,6 +209,15 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Check whether value is true, and if not return kTfLiteError from +// the current function (and report the error string msg). +#define TF_LITE_OPAQUE_ENSURE_MSG(opaque_context, value, msg) \ + do { \ + if (!(value)) { \ + TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), __FILE__ " " msg); \ + return kTfLiteError; \ + } \ + } while (0) // Check whether the value `a` is true, and if not return kTfLiteError from // the current function, while also reporting the location of the error. #define TF_LITE_ENSURE(context, a) \ @@ -206,6 +229,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Check whether the value `a` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +#define TF_LITE_OPAQUE_ENSURE(opaque_context, a) \ + do { \ + if (!(a)) { \ + TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, "%s:%d: %s was not true.", \ + __FILE__, __LINE__, #a); \ + return kTfLiteError; \ + } \ + } while (0) #define TF_LITE_ENSURE_STATUS(a) \ do { \ const TfLiteStatus s = (a); \ @@ -228,6 +261,19 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Check whether the value `a == b` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +// `a` and `b` may be evaluated more than once, so no side effects or +// extremely expensive computations should be done. +// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes. +#define TF_LITE_OPAQUE_ENSURE_EQ(opaque_context, a, b) \ + do { \ + if ((a) != (b)) { \ + TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), "%s:%d %s != %s (%d != %d)", \ + __FILE__, __LINE__, #a, #b, (a), (b)); \ + return kTfLiteError; \ + } \ + } while (0) #define TF_LITE_ENSURE_TYPES_EQ(context, a, b) \ do { \ if ((a) != (b)) { \ @@ -238,6 +284,15 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +#define TF_LITE_OPAQUE_ENSURE_TYPES_EQ(opaque_context, a, b) \ + do { \ + if ((a) != (b)) { \ + TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), "%s:%d %s != %s (%s != %s)", \ + __FILE__, __LINE__, #a, #b, \ + TfLiteTypeGetName(a), TfLiteTypeGetName(b)); \ + return kTfLiteError; \ + } \ + } while (0) #define TF_LITE_ENSURE_NEAR(context, a, b, epsilon) \ do { \ auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a)); \ @@ -249,6 +304,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +#define TF_LITE_OPAQUE_ENSURE_NEAR(opaque_context, a, b, epsilon) \ + do { \ + auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a)); \ + if (delta > epsilon) { \ + TF_LITE_OPAQUE_KERNEL_LOG( \ + (opaque_context), "%s:%d %s not near %s (%f != %f)", __FILE__, \ + __LINE__, #a, #b, static_cast(a), static_cast(b)); \ + return kTfLiteError; \ + } \ + } while (0) #define TF_LITE_ENSURE_OK(context, status) \ do { \ const TfLiteStatus s = (status); \ @@ -648,23 +713,26 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // If `preserve_data` is true, tensor data will be unchanged in the range from -// the start of the region up to the minimum of the old and new sizes. -void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, - bool preserve_data); +// the start of the region up to the minimum of the old and new sizes. In the +// case of NULL tensor, or an error allocating new memory, returns +// `kTfLiteError`. +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data); // Change the size of the memory block owned by `tensor` to `num_bytes`. -// Tensors with allocation types other than kTfLiteDynamic will be ignored. +// Tensors with allocation types other than kTfLiteDynamic will be ignored and +// a kTfLiteOk will be returned. // `tensor`'s internal data buffer will be assigned a pointer // which can safely be passed to free or realloc if `num_bytes` is zero. -// Behaviour is undefined if `tensor` is NULL. // Tensor data will be unchanged in the range from the start of the region up to -// the minimum of the old and new sizes. -void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); +// the minimum of the old and new sizes. In the case +// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`. +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); #endif // TF_LITE_STATIC_MEMORY // WARNING: This is an experimental interface that is subject to change. @@ -1135,6 +1203,17 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate( // 'delegate' is a null pointer. void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate); +// Returns a pointer to the data associated with the provided opaque 'delegate'. +// +// A null pointer will be returned when: +// - The 'delegate' is null. +// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the +// 'delegate' was null. +// - Or in case of any other error. +// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder', +// but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null. +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h index 77c766d25..cc881a3af 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -29,14 +29,15 @@ namespace reference_integer_ops { // zero_point (params.weights_offset) is always 0. // However, for per-tensor functions, params.weights_offset is still applied for // backward compatibility. - -inline void FullyConnectedPerChannel( +template +void FullyConnectedPerChannel( const FullyConnectedParams& params, const int32_t* output_multiplier, const int* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { + const InputType* input_data, const RuntimeShape& filter_shape, + const WeightType* filter_data, const RuntimeShape& bias_shape, + const BiasType* bias_data, const RuntimeShape& output_shape, + OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t output_offset = params.output_offset; const int32_t output_activation_min = params.quantized_activation_min; @@ -52,7 +53,7 @@ inline void FullyConnectedPerChannel( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -61,62 +62,26 @@ inline void FullyConnectedPerChannel( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c], - output_shift[out_c]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -template -inline void FullyConnectedPerChannel( - const FullyConnectedParams& params, const int32_t* output_multiplier, - const int* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += filter_val * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } int32_t acc_scaled = MultiplyByQuantizedMultiplier( acc, output_multiplier[out_c], output_shift[out_c]); + acc_scaled += output_offset; acc_scaled = std::max(acc_scaled, output_activation_min); acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { +template +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, + const InputType* input_data, + const RuntimeShape& filter_shape, + const WeightType* filter_data, + const RuntimeShape& bias_shape, const BiasType* bias_data, + const RuntimeShape& output_shape, OutputType* output_data) { const int32_t input_offset = params.input_offset; const int32_t filter_offset = params.weights_offset; const int32_t output_offset = params.output_offset; @@ -136,7 +101,7 @@ inline void FullyConnected( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; + BiasType acc = 0; for (int d = 0; d < accum_depth; ++d) { int32_t input_val = input_data[b * accum_depth + d]; int32_t filter_val = filter_data[out_c * accum_depth + d]; @@ -145,11 +110,13 @@ inline void FullyConnected( if (bias_data) { acc += bias_data[out_c]; } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); + int32_t acc_scaled = + MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc_scaled += output_offset; + acc_scaled = std::max(acc_scaled, output_activation_min); + acc_scaled = std::min(acc_scaled, output_activation_max); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); } } } @@ -168,48 +135,6 @@ inline void FullyConnectedWithPackedInt4Weights( output_data); } -template -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t filter_offset = params.weights_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } - int32_t acc_scaled = - MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc_scaled = std::max(acc_scaled, output_activation_min); - acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); - } - } -} - } // namespace reference_integer_ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h index 22e897409..05066184c 100644 --- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h @@ -24,10 +24,10 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { -template -inline void MulElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { +template +void MulElementwise(int size, const ArithmeticParams& params, + const InputType* input1_data, const InputType* input2_data, + OutputType* output_data) { for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; const int32_t input2_val = params.input2_offset + input2_data[i]; @@ -39,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params, const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h new file mode 100644 index 000000000..9481b09c3 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h @@ -0,0 +1,690 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_ +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/lstm_eval.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" +#include "tensorflow/lite/micro/test_helpers.h" +#include "tensorflow/lite/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +// IntegerLstmParameter is required by the legend int8 code. Not required for +// the generalized standard LSTM (e.g., 16bits activation case) +template +IntegerLstmParameter CreateIntegerParameter( + const LstmNodeContents& + quantized_node_contents) { + IntegerLstmParameter evaluation_params; + double effective_scale; + int buffer_shift_output; + + const auto quantization_settings = + quantized_node_contents.QuantizationSettings(); + effective_scale = quantization_settings.input.scale * + quantization_settings.forget_gate.activation_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_input_to_forget_scale_a, + &buffer_shift_output); + evaluation_params.effective_input_to_forget_scale_b = buffer_shift_output; + effective_scale = quantization_settings.output.scale * + quantization_settings.forget_gate.recurrent_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_recurrent_to_forget_scale_a, + &buffer_shift_output); + evaluation_params.effective_recurrent_to_forget_scale_b = buffer_shift_output; + // Set effective bias + evaluation_params.input_to_forget_effective_bias = const_cast( + quantized_node_contents.ForgetGateData().activation_zp_folded_bias); + evaluation_params.recurrent_to_forget_effective_bias = const_cast( + quantized_node_contents.ForgetGateData().recurrent_zp_folded_bias); + + // input gate + effective_scale = quantization_settings.input.scale * + quantization_settings.input_gate.activation_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_input_to_input_scale_a, + &buffer_shift_output); + evaluation_params.effective_input_to_input_scale_b = buffer_shift_output; + effective_scale = quantization_settings.output.scale * + quantization_settings.input_gate.recurrent_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_recurrent_to_input_scale_a, + &buffer_shift_output); + evaluation_params.effective_recurrent_to_input_scale_b = buffer_shift_output; + // Set effective bias + evaluation_params.input_to_input_effective_bias = const_cast( + quantized_node_contents.InputGateData().activation_zp_folded_bias); + evaluation_params.recurrent_to_input_effective_bias = const_cast( + quantized_node_contents.InputGateData().recurrent_zp_folded_bias); + + // cell gate + effective_scale = quantization_settings.input.scale * + quantization_settings.cell_gate.activation_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_input_to_cell_scale_a, + &buffer_shift_output); + evaluation_params.effective_input_to_cell_scale_b = buffer_shift_output; + effective_scale = quantization_settings.output.scale * + quantization_settings.cell_gate.recurrent_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_recurrent_to_cell_scale_a, + &buffer_shift_output); + evaluation_params.effective_recurrent_to_cell_scale_b = buffer_shift_output; + // Set effective bias + evaluation_params.input_to_cell_effective_bias = const_cast( + quantized_node_contents.CellGateData().activation_zp_folded_bias); + evaluation_params.recurrent_to_cell_effective_bias = const_cast( + quantized_node_contents.CellGateData().recurrent_zp_folded_bias); + + // output gate + effective_scale = quantization_settings.input.scale * + quantization_settings.output_gate.activation_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_input_to_output_scale_a, + &buffer_shift_output); + evaluation_params.effective_input_to_output_scale_b = buffer_shift_output; + effective_scale = quantization_settings.output.scale * + quantization_settings.output_gate.recurrent_weight.scale / + quantization_settings.nonlinear_activation_input_scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_recurrent_to_output_scale_a, + &buffer_shift_output); + evaluation_params.effective_recurrent_to_output_scale_b = buffer_shift_output; + // Set effective bias + evaluation_params.input_to_output_effective_bias = const_cast( + quantized_node_contents.OutputGateData().activation_zp_folded_bias); + evaluation_params.recurrent_to_output_effective_bias = const_cast( + quantized_node_contents.OutputGateData().recurrent_zp_folded_bias); + + // hidden state (no projection, output is the hidden state) + effective_scale = quantization_settings.nonlinear_activation_output_scale * + quantization_settings.nonlinear_activation_output_scale / + quantization_settings.hidden_state.scale; + QuantizeMultiplier(effective_scale, + &evaluation_params.effective_hidden_scale_a, + &buffer_shift_output); + evaluation_params.effective_hidden_scale_b = buffer_shift_output; + evaluation_params.hidden_zp = quantization_settings.hidden_state.zero_point; + + // cell state. Note, cell_scale is actually not a scale. 2^-cell_scale is + // the true scale for cell + int buffer_cell_scale; + tflite::CheckedLog2(quantization_settings.cell_state.scale, + &buffer_cell_scale); + evaluation_params.cell_scale = buffer_cell_scale; + + evaluation_params.quantized_cell_clip = static_cast(std::min( + std::max( + static_cast(quantized_node_contents.BuiltinData().cell_clip) / + quantization_settings.cell_state.scale, + -32768.0), + 32767.0)); + return evaluation_params; +} + +/*TEST HELPER FUNCTIONS*/ +template +void ValidateResultGoldens(const T* golden, const T* output_data, + const int output_len, const float tolerance) { + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance); + } +} + +template +void TestGateOutputFloat( + const GateData& gate_params, + const TfLiteFusedActivation activation_type, const float* input_data, + const float* hidden_state, const float* expected_vals, + const float tolerance) { + float gate_output[batch_size * state_dimension] = {}; + tflite::lstm_internal::CalculateLstmGateFloat( + input_data, gate_params.activation_weight, + /*aux_input=*/nullptr, /*aux_input_to_gate_weights*/ nullptr, + hidden_state, gate_params.recurrent_weight, + /*cell_state=*/nullptr, /*cell_to_gate_weights=*/nullptr, + /*layer_norm_coefficients=*/nullptr, gate_params.fused_bias, batch_size, + input_dimension, input_dimension, state_dimension, state_dimension, + /*activation=*/activation_type, gate_output, + /*is_input_all_zeros=*/false, + /*is_aux_input_all_zeros=*/true); + ValidateResultGoldens(expected_vals, gate_output, + batch_size * state_dimension, tolerance); +} + +template +void TestGateOutputQuantized( + const ActivationType* quantized_input, + const ActivationType* quantized_hidden_state, + const GateData& + gate_params, + const NodeQuantizationParameters& quantization_settings, + int32_t effective_input_to_gate_scale_a, + int32_t effective_input_to_gate_scale_b, + int32_t effective_recurrent_to_gate_scale_a, + int32_t effective_recurrent_to_gate_scale_b, + TfLiteFusedActivation nonlinear_type, const float* expected_vals, + float tolerance) { + CellType gate_output[batch_size * state_dimension] = {}; + BiasType scratch_buffer[batch_size * state_dimension] = {}; + + tflite::lstm_internal::CalculateLstmGateInteger8x8_16( + // Input and weights + quantized_input, gate_params.activation_weight, + gate_params.activation_zp_folded_bias, effective_input_to_gate_scale_a, + effective_input_to_gate_scale_b, + // Output state and weights + quantized_hidden_state, gate_params.activation_weight, + gate_params.recurrent_zp_folded_bias, effective_recurrent_to_gate_scale_a, + effective_recurrent_to_gate_scale_b, + // Cell state and weights + nullptr, nullptr, 0, 0, + // Layer normalization parameters (layer norm LSTM) + nullptr, nullptr, 0, 0, 0, + // Array sizes + batch_size, input_dimension, state_dimension, state_dimension, + nonlinear_type, + // Output + gate_output, + // Parameters for performance optimizations + // Scratch arrays + scratch_buffer); + + float gate_output_float[batch_size * state_dimension] = {}; + Dequantize(gate_output, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0, + gate_output_float); + + ValidateResultGoldens(expected_vals, gate_output_float, + batch_size * state_dimension, tolerance); +} + +template +void TestCellUpdateFloat( + const GateOutputCheckData& gate_output_data, + const float cell_clip, const float tolerance) { + // copy the data since it will be updated + float cell_state[batch_size * state_dimension] = {}; + std::memcpy(cell_state, gate_output_data.cell_state, + batch_size * state_dimension * sizeof(float)); + + float forget_gate[batch_size * state_dimension] = {}; + std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output, + batch_size * state_dimension * sizeof(float)); + + tflite::lstm_internal::UpdateLstmCellFloat( + batch_size, state_dimension, cell_state, + gate_output_data.expected_input_gate_output, forget_gate, + gate_output_data.expected_cell_gate_output, + /*use_cifg=*/false, cell_clip); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, cell_state, + batch_size * state_dimension, tolerance); +} + +template +void TestCellUpdateQuantized( + const GateOutputCheckData& gate_output_data, + const NodeQuantizationParameters& quantization_settings, + const int32_t cell_scale_shift, const CellType quantized_cell_clip, + const float tolerance) { + CellType quantized_cell_state[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.cell_state, quantized_cell_state, + batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point); + + CellType quantized_forget_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_forget_gate_output, + quantized_forget_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_input_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_input_gate_output, + quantized_input_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_cell_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_cell_gate_output, + quantized_cell_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + tflite::lstm_internal::UpdateLstmCellInteger( + batch_size, state_dimension, quantized_cell_state, cell_scale_shift, + quantized_input_gate, quantized_forget_gate, quantized_cell_gate, false, + quantized_cell_clip); + + float cell_state_float[batch_size * state_dimension] = {}; + Dequantize(quantized_cell_state, batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, cell_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + cell_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestHiddenStateUpdateFloat( + const GateOutputCheckData& gate_output_data, + const float tolerance) { + // If no projection layer, hidden state dimension == output dimension == + // cell state dimension + float output[batch_size * state_dimension] = {}; + float scratch[batch_size * state_dimension] = {}; + + tflite::lstm_internal::CalculateLstmOutputFloat( + batch_size, state_dimension, state_dimension, + gate_output_data.expected_updated_cell, + gate_output_data.expected_output_gate_output, kTfLiteActTanh, nullptr, + nullptr, 0, output, scratch); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, output, + batch_size * state_dimension, tolerance); +} + +template +void TestHiddenStateUpdateQuantized( + const GateOutputCheckData& gate_output_data, + const NodeQuantizationParameters& quantization_settings, + const IntegerLstmParameter& evaluation_params, const float tolerance) { + CellType quantized_cell_state[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_updated_cell, quantized_cell_state, + batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point); + + CellType quantized_output_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_output_gate_output, + quantized_output_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + // scratches + int16_t scratch0[batch_size * state_dimension] = {}; + int8_t scratch1[batch_size * state_dimension] = {}; + int32_t scratch2[batch_size * state_dimension] = {}; + + // output (updated hidden state) + int8_t output_state[batch_size * state_dimension] = {}; + + tflite::lstm_internal::CalculateLstmOutputInteger8x8_16( + batch_size, state_dimension, state_dimension, quantized_cell_state, + evaluation_params.cell_scale, quantized_output_gate, + evaluation_params.effective_hidden_scale_a, + evaluation_params.effective_hidden_scale_b, evaluation_params.hidden_zp, + /*projection_weights=*/nullptr, /*proj_scale_a=*/0, 0, 0, + /*output_state_zp=*/evaluation_params.hidden_zp, + evaluation_params.quantized_proj_clip, output_state, scratch0, scratch1, + scratch2); + + float output_state_float[batch_size * state_dimension] = {}; + Dequantize(output_state, batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, output_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + output_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestOneStepLSTMFloat( + const TfLiteLSTMParams& general_model_settings, + /*can not be const, state will be updated*/ + LstmNodeContents& node_contents, + const GateOutputCheckData& gate_output_data, + const float tolerance) { + // scratch buffers + float forget_gate_scratch[batch_size * state_dimension] = {}; + float input_gate_scratch[batch_size * state_dimension] = {}; + float cell_gate_scratch[batch_size * state_dimension] = {}; + float output_gate_scratch[batch_size * state_dimension] = {}; + + // states and output will be modified (cannot use the const getter) + float* hidden_state = node_contents.GetHiddenStateData(); + float* cell_state = node_contents.GetCellStateData(); + float* output = node_contents.GetOutputData(); + + tflite::lstm_internal::LstmStepFloat( + gate_output_data.input_data, + node_contents.InputGateData().activation_weight, + node_contents.ForgetGateData().activation_weight, + node_contents.CellGateData().activation_weight, + node_contents.OutputGateData().activation_weight, + /*aux_input_ptr=*/nullptr, /*aux_input_to_input_weights_ptr=*/nullptr, + /*aux_input_to_forget_weights_ptr=*/nullptr, + /*aux_input_to_cell_weights_ptr=*/nullptr, + /*aux_input_to_output_weights_ptr=*/nullptr, + node_contents.InputGateData().recurrent_weight, + node_contents.ForgetGateData().recurrent_weight, + node_contents.CellGateData().recurrent_weight, + node_contents.OutputGateData().recurrent_weight, + /*cell_to_input_weights_ptr=*/nullptr, + /*cell_to_forget_weights_ptr=*/nullptr, + /*cell_to_output_weights_ptr=*/nullptr, + /*input_layer_norm_coefficients_ptr=*/nullptr, + /*forget_layer_norm_coefficients_ptr=*/nullptr, + /*cell_layer_norm_coefficients_ptr=*/nullptr, + /*output_layer_norm_coefficients_ptr=*/nullptr, + node_contents.InputGateData().fused_bias, + node_contents.ForgetGateData().fused_bias, + node_contents.CellGateData().fused_bias, + node_contents.OutputGateData().fused_bias, + /*projection_weights_ptr=*/nullptr, /*projection_bias_ptr=*/nullptr, + &general_model_settings, batch_size, state_dimension, input_dimension, + input_dimension, state_dimension, + /*output_batch_leading_dim=*/0, hidden_state, cell_state, + input_gate_scratch, forget_gate_scratch, cell_gate_scratch, + output_gate_scratch, output); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, hidden_state, + batch_size * state_dimension, tolerance); + ValidateResultGoldens(gate_output_data.expected_updated_cell, cell_state, + batch_size * state_dimension, tolerance); +} + +template +void TestOneStepLSTMQuantized( + /*can not be const, state will be updated*/ + LstmNodeContents& + model_contents, + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance) { + // Scratch buffers + CellType scratch0[batch_size * state_dimension] = {}; + CellType scratch1[batch_size * state_dimension] = {}; + CellType scratch2[batch_size * state_dimension] = {}; + CellType scratch3[batch_size * state_dimension] = {}; + ActivationType scratch4[batch_size * state_dimension] = {}; + BiasType scratch5[batch_size * state_dimension] = {}; + + // states and output will be modified (cannot use the const getter) + ActivationType* hidden_state = model_contents.GetHiddenStateData(); + CellType* cell_state = model_contents.GetCellStateData(); + ActivationType* output = model_contents.GetOutputData(); + + const auto evaluation_params = + tflite::testing::CreateIntegerParameter(model_contents); + const auto quantization_settings = model_contents.QuantizationSettings(); + + tflite::lstm_internal::LstmStepInteger8x8_16( + model_contents.GetInputData(), + model_contents.InputGateData().activation_weight, + evaluation_params.effective_input_to_input_scale_a, + evaluation_params.effective_input_to_input_scale_b, + model_contents.ForgetGateData().activation_weight, + evaluation_params.effective_input_to_forget_scale_a, + evaluation_params.effective_input_to_forget_scale_b, + model_contents.CellGateData().activation_weight, + evaluation_params.effective_input_to_cell_scale_a, + evaluation_params.effective_input_to_cell_scale_b, + model_contents.OutputGateData().activation_weight, + evaluation_params.effective_input_to_output_scale_a, + evaluation_params.effective_input_to_output_scale_b, + model_contents.InputGateData().recurrent_weight, + evaluation_params.effective_recurrent_to_input_scale_a, + evaluation_params.effective_recurrent_to_input_scale_b, + model_contents.ForgetGateData().recurrent_weight, + evaluation_params.effective_recurrent_to_forget_scale_a, + evaluation_params.effective_recurrent_to_forget_scale_b, + model_contents.CellGateData().recurrent_weight, + evaluation_params.effective_recurrent_to_cell_scale_a, + evaluation_params.effective_recurrent_to_cell_scale_b, + model_contents.OutputGateData().recurrent_weight, + evaluation_params.effective_recurrent_to_output_scale_a, + evaluation_params.effective_recurrent_to_output_scale_b, + /*cell_to_input_weight_ptr=*/nullptr, + /*effective_cell_to_input_scale_a=*/0, + /*effective_cell_to_input_scale_b=*/0, + /*cell_to_forget_weight_ptr=*/nullptr, + /*effective_cell_to_forget_scale_a=*/0, + /*effective_cell_to_forget_scale_b=*/0, + /*cell_to_output_weight_ptr=*/nullptr, + /*effective_cell_to_output_scale_a=*/0, + /*effective_cell_to_output_scale_b=*/0, + /*projection_weight_ptr=*/nullptr, /*effective_proj_scale_a=*/0, + /*effective_proj_scale_b=*/0, evaluation_params.hidden_zp, + evaluation_params.effective_hidden_scale_a, + evaluation_params.effective_hidden_scale_b, + /*layer_norm_input_weight_ptr=*/nullptr, + /*layer_norm_input_scale_a=*/0, /*layer_norm_input_scale_b=*/0, + /*layer_norm_forget_weight_ptr=*/nullptr, + /*layer_norm_forget_scale_a=*/0, /*layer_norm_forget_scale_b=*/0, + /*layer_norm_cell_weight_ptr=*/nullptr, + /*layer_norm_cell_scale_a=*/0, /*layer_norm_cell_scale_b=*/0, + /*layer_norm_output_weight_ptr=*/nullptr, + /*layer_norm_output_scale_a=*/0, /*layer_norm_output_scale_b=*/0, + /*input_gate_bias_ptr=*/nullptr, /*forget_gate_bias_ptr=*/nullptr, + /*cell_gate_bias_ptr=*/nullptr, /*output_gate_bias_ptr=*/nullptr, + evaluation_params.quantized_cell_clip, + evaluation_params.quantized_proj_clip, evaluation_params.cell_scale, + /*input_variance_guard=*/0, /*forget_variance_guard=*/0, + /*cell_variance_guard=*/0, /*output_variance_guard=*/0, + evaluation_params.input_to_forget_effective_bias, + evaluation_params.recurrent_to_forget_effective_bias, + evaluation_params.input_to_cell_effective_bias, + evaluation_params.recurrent_to_cell_effective_bias, + evaluation_params.input_to_output_effective_bias, + evaluation_params.recurrent_to_output_effective_bias, + evaluation_params.input_to_input_effective_bias, + evaluation_params.recurrent_to_input_effective_bias, + evaluation_params.projection_effective_bias, batch_size, input_dimension, + state_dimension, state_dimension, hidden_state, + quantization_settings.output.zero_point, cell_state, output, scratch0, + scratch1, scratch2, scratch3, scratch4, scratch5); + + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize(hidden_state, batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, + dequantized_hidden_state); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize(cell_state, batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, + dequantized_cell_state); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + ValidateResultGoldens(gate_output_data.expected_updated_cell, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); +} + +template +void TestLSTMEvalFloat( + /*can not be const, state will be updated*/ + LstmNodeContents& float_model_contents, + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float tolerance) { + float scratch_buffers[4 * batch_size * state_dimension] = {}; + auto general_model_settings = float_model_contents.BuiltinData(); + tflite::EvalFloatLstm( + float_model_contents.GetEvalTensor(kLstmInputTensor), + float_model_contents.GetEvalTensor(kLstmInputToInputWeightsTensor), + float_model_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor), + float_model_contents.GetEvalTensor(kLstmInputToCellWeightsTensor), + float_model_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor), + float_model_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor), + float_model_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor), + float_model_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor), + float_model_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor), + /*cell_to_input_weights=*/nullptr, + /*cell_to_forget_weights=*/nullptr, + /*cell_to_output_weights=*/nullptr, + /*input_layer_norm_coefficients=*/nullptr, + /*forget_layer_norm_coefficients=*/nullptr, + /*cell_layer_norm_coefficients=*/nullptr, + /*output_layer_norm_coefficients=*/nullptr, + /*aux_input=*/nullptr, + /*aux_input_to_input_weights=*/nullptr, + /*aux_input_to_forget_weights=*/nullptr, + /*aux_input_to_cell_weights=*/nullptr, + /*aux_input_to_output_weights=*/nullptr, + float_model_contents.GetEvalTensor(kLstmInputGateBiasTensor), + float_model_contents.GetEvalTensor(kLstmForgetGateBiasTensor), + float_model_contents.GetEvalTensor(kLstmCellGateBiasTensor), + float_model_contents.GetEvalTensor(kLstmOutputGateBiasTensor), + /*projection_weights=*/nullptr, + /*projection_bias=*/nullptr, &general_model_settings, + /*forward_sequence=*/true, /*time_major=*/false, + /*output_offset=*/0, scratch_buffers, + float_model_contents.HiddenStateEvalTensor(), + float_model_contents.CellStateEvalTensor(), + float_model_contents.OutputEvalTensor()); + + // Validate hidden state. See previous test for the calculation + ValidateResultGoldens(eval_check_data.expected_hidden_state, + float_model_contents.GetHiddenStateData(), + batch_size * state_dimension, tolerance); + // Validate cell state. See previous test for the calculation + ValidateResultGoldens(eval_check_data.expected_cell_state, + float_model_contents.GetCellStateData(), + batch_size * state_dimension, tolerance); + // Validate output . See previous test for the calculation + ValidateResultGoldens(eval_check_data.expected_output, + float_model_contents.GetOutputData(), + batch_size * state_dimension * time_steps, tolerance); +} + +template +void TestLSTMEvalQuantized( + /*can not be const, state will be updated*/ + LstmNodeContents& + quantized_model_content, + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance) { + // Scratch buffers + CellType scratch0[batch_size * state_dimension] = {}; + CellType scratch1[batch_size * state_dimension] = {}; + CellType scratch2[batch_size * state_dimension] = {}; + CellType scratch3[batch_size * state_dimension] = {}; + ActivationType scratch4[batch_size * state_dimension * time_steps] = {}; + BiasType scratch5[batch_size * state_dimension] = {}; + + const auto quantization_settings = + quantized_model_content.QuantizationSettings(); + const auto evaluation_params = + tflite::testing::CreateIntegerParameter(quantized_model_content); + const auto general_model_settings = quantized_model_content.BuiltinData(); + + EvalInteger8x8_16Lstm( + quantized_model_content.GetEvalTensor(kLstmInputTensor), + quantized_model_content.GetEvalTensor(kLstmInputToInputWeightsTensor), + quantized_model_content.GetEvalTensor(kLstmInputToForgetWeightsTensor), + quantized_model_content.GetEvalTensor(kLstmInputToCellWeightsTensor), + quantized_model_content.GetEvalTensor(kLstmInputToOutputWeightsTensor), + quantized_model_content.GetEvalTensor(kLstmRecurrentToInputWeightsTensor), + quantized_model_content.GetEvalTensor( + kLstmRecurrentToForgetWeightsTensor), + quantized_model_content.GetEvalTensor(kLstmRecurrentToCellWeightsTensor), + quantized_model_content.GetEvalTensor( + kLstmRecurrentToOutputWeightsTensor), + /*cell_to_input_weights=*/nullptr, + /*cell_to_forget_weights=*/nullptr, + /*cell_to_output_weights=*/nullptr, + /*input_layer_norm_coefficients=*/nullptr, + /*forget_layer_norm_coefficients=*/nullptr, + /*cell_layer_norm_coefficients=*/nullptr, + /*output_layer_norm_coefficients=*/nullptr, + quantized_model_content.GetEvalTensor(kLstmInputGateBiasTensor), + quantized_model_content.GetEvalTensor(kLstmForgetGateBiasTensor), + quantized_model_content.GetEvalTensor(kLstmCellGateBiasTensor), + quantized_model_content.GetEvalTensor(kLstmOutputGateBiasTensor), + /*projection_weights=*/nullptr, + /*projection_bias=*/nullptr, &general_model_settings, + /*forward_sequence=*/true, /*time_major=*/false, &evaluation_params, + quantization_settings.output.zero_point, + quantized_model_content.HiddenStateEvalTensor(), + quantized_model_content.CellStateEvalTensor(), + quantized_model_content.OutputEvalTensor(), scratch0, scratch1, scratch2, + scratch3, scratch4, scratch5); + + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize( + quantized_model_content.GetHiddenStateData(), + batch_size * state_dimension, quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, dequantized_hidden_state); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize( + quantized_model_content.GetCellStateData(), batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, dequantized_cell_state); + ValidateResultGoldens(eval_check_data.expected_cell_state, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); + + float dequantized_output[batch_size * state_dimension * time_steps] = {}; + Dequantize(quantized_model_content.GetOutputData(), + batch_size * state_dimension * time_steps, + quantization_settings.output.scale, + quantization_settings.output.zero_point, dequantized_output); + ValidateResultGoldens(eval_check_data.expected_output, dequantized_output, + batch_size * state_dimension, hidden_state_tolerance); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h index df2a8d2c3..0f9df42e7 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h @@ -70,6 +70,7 @@ TfLiteRegistration Register_MIRROR_PAD(); TfLiteRegistration Register_NEG(); TfLiteRegistration Register_PRELU(); TfLiteRegistration Register_MUL(); +TfLiteRegistration Register_PACK(); TfLiteRegistration Register_PAD(); TfLiteRegistration Register_PADV2(); TfLiteRegistration Register_QUANTIZE(); @@ -113,7 +114,6 @@ TfLiteRegistration Register_LOGICAL_NOT(); TfLiteRegistration Register_MAXIMUM(); TfLiteRegistration Register_MINIMUM(); TfLiteRegistration Register_NOT_EQUAL(); -TfLiteRegistration Register_PACK(); TfLiteRegistration Register_RESHAPE(); TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR(); TfLiteRegistration Register_ROUND(); diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc index 5e322b87b..5a4eb4f53 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_log.h" namespace tflite { -namespace ops { -namespace micro { -namespace pack { + namespace { constexpr int kOutputTensor = 0; @@ -106,12 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } // namespace -} // namespace pack TfLiteRegistration Register_PACK() { - return tflite::micro::RegisterOp(nullptr, nullptr, pack::Eval); + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc index d9b147ad8..050913c5a 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,7 +43,12 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { AveragePoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - AveragePoolingEvalQuantized(context, node, params, data, input, output); + AveragePoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + AveragePoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Input type %s is not currently supported", @@ -71,7 +76,12 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { MaxPoolingEvalFloat(context, node, params, data, input, output); break; case kTfLiteInt8: - MaxPoolingEvalQuantized(context, node, params, data, input, output); + MaxPoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + MaxPoolingEvalQuantized(context, node, params, data, input, + output); break; default: MicroPrintf("Type %s not currently supported.", diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h index 493250ee1..7b322480c 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h @@ -20,7 +20,14 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/micro_log.h" namespace tflite { @@ -50,27 +57,69 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::AveragePool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, TfLiteEvalTensor* output); +template void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, - TfLiteEvalTensor* output); + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} #if defined(CMSIS_NN) TfLiteRegistration Register_AVERAGE_POOL_2D_INT8(); TfLiteRegistration Register_MAX_POOL_2D_INT8(); + +TfLiteRegistration Register_AVERAGE_POOL_2D_INT16(); + +TfLiteRegistration Register_MAX_POOL_2D_INT16(); #else inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { return tflite::Register_AVERAGE_POOL_2D(); @@ -79,6 +128,14 @@ inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { inline TfLiteRegistration Register_MAX_POOL_2D_INT8() { return tflite::Register_MAX_POOL_2D(); } + +inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() { + return tflite::Register_AVERAGE_POOL_2D(); +} + +inline TfLiteRegistration Register_MAX_POOL_2D_INT16() { + return tflite::Register_MAX_POOL_2D(); +} #endif } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc index ddc18f0bb..b39e9d846 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,10 +69,14 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) { if (input->type == kTfLiteFloat32) { CalculateActivationRange(params->activation, &data->activation_min_f32, &data->activation_max_f32); - } else if (input->type == kTfLiteInt8) { + } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { CalculateActivationRangeQuantized(context, params->activation, output, &data->activation_min, &data->activation_max); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; } micro_context->DeallocateTempTfLiteTensor(input); @@ -102,30 +106,6 @@ void AveragePoolingEvalFloat(const TfLiteContext* context, tflite::micro::GetTensorData(output)); } -void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->type == kTfLiteInt8); - - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::AveragePool( - op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpDataPooling* data, const TfLiteEvalTensor* input, @@ -145,26 +125,4 @@ void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, tflite::micro::GetTensorData(output)); } -void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::MaxPool(op_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - } // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD index e7187ef0d..0698846ff 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD @@ -15,3 +15,16 @@ cc_library( hdrs = ["conv_test_data.h"], deps = ["//tensorflow/lite/c:common"], ) + +cc_library( + name = "lstm_test_data", + srcs = ["lstm_test_data.cc"], + hdrs = [ + "lstm_test_data.h", + ], + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/micro:test_helpers", + "//tensorflow/lite/micro/kernels:lstm_shared", + ], +) diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc new file mode 100644 index 000000000..4cfe1069a --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc @@ -0,0 +1,252 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" + +#include + +namespace tflite { +namespace testing { + +namespace { +// LSTM internal setting (e.g., nonlinear activation type) +constexpr TfLiteLSTMParams kDefaultBuiltinData = { + /*.activation=*/kTfLiteActTanh, + /*.cell_clip=*/6, /*.proj_clip=*/3, + /*.kernel_type=*/kTfLiteLSTMFullKernel, + /*.asymmetric_quantize_inputs=*/true}; +} // namespace + +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData() { + GateOutputCheckData<4, 4> gate_data; + const float input_data[4] = { + 0.2, 0.3, // batch1 + -0.98, 0.62 // batch2 + }; + std::memcpy(gate_data.input_data, input_data, 4 * sizeof(float)); + + const float hidden_state[4] = { + -0.1, 0.2, // batch1 + -0.3, 0.5 // batch2 + }; + std::memcpy(gate_data.hidden_state, hidden_state, 4 * sizeof(float)); + + const float cell_state[4] = { + -1.3, 6.2, // batch1 + -7.3, 3.5 // batch2 + }; + std::memcpy(gate_data.cell_state, cell_state, 4 * sizeof(float)); + + // Use the forget gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[-10,-10],[-20,-20]][0.2, + // +[[-10,-10],[-20,-20]][-0.1, 0.2]+[1,2]) = sigmoid([-5,-10]) = + // [6.69285092e-03, 4.53978687e-05] (Batch1) + // Similarly, we have [0.93086158 0.9945137 ] for batch 2 + const float expected_forget_gate_output[4] = {6.69285092e-3f, 4.53978687e-5f, + 0.93086158, 0.9945137}; + std::memcpy(gate_data.expected_forget_gate_output, + expected_forget_gate_output, 4 * sizeof(float)); + + // Use the input gate parameters to test small gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[10,10],[20,20]][0.2, 0.3] + // +[[10,10],[20,20]][-0.1, 0.2]+[-1,-2]) = sigmoid([5,10]) = + // [0.99330715, 0.9999546] + // Similarly, we have [0.06913842 0.0054863 ] for batch 2 + const float expected_input_gate_output[4] = {0.99330715, 0.9999546, + 0.06913842, 0.0054863}; + std::memcpy(gate_data.expected_input_gate_output, expected_input_gate_output, + 4 * sizeof(float)); + + // Use the output gate parameters to test normnal gate outputs + // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = sigmoid([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [[0.46008512 0.46008512]] for batch 2 + const float expected_output_gate_output[4] = { + 0.6456563062257954, 0.6456563062257954, 0.46008512, 0.46008512}; + std::memcpy(gate_data.expected_output_gate_output, + expected_output_gate_output, 4 * sizeof(float)); + + // Use the cell(modulation) gate parameters to tanh output + // output = tanh(W_i*i+W_h*h+b) = tanh([[1,1],[1,1]][0.2, 0.3] + // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = tanh([0.6,0.6]) = + // [0.6456563062257954, 0.6456563062257954] + // Similarly, we have [-0.1586485 -0.1586485] for batch 2 + const float expected_cell_gate_output[4] = { + 0.5370495669980353, 0.5370495669980353, -0.1586485, -0.1586485}; + std::memcpy(gate_data.expected_cell_gate_output, expected_cell_gate_output, + 4 * sizeof(float)); + + // Cell = forget_gate*cell + input_gate*cell_gate + // Note -6.80625824 is clipped to -6 + const float expected_updated_cell[4] = {0.52475447, 0.53730665, -6, + 3.47992756}; + std::memcpy(gate_data.expected_updated_cell, expected_updated_cell, + 4 * sizeof(float)); + + // Use the updated cell state to update the hidden state + // tanh(expected_updated_cell) * expected_output_gate_output + const float expected_updated_hidden[4] = {0.31079388, 0.3169827, -0.46007947, + 0.45921249}; + std::memcpy(gate_data.expected_updated_hidden, expected_updated_hidden, + 4 * sizeof(float)); + return gate_data; +} + +// TODO(b/253466487): document how the golden values are arrived at +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData() { + LstmEvalCheckData<12, 4, 12> eval_data; + const float input_data[12] = { + 0.2, 0.3, 0.2, 0.3, 0.2, 0.3, // batch one + -0.98, 0.62, 0.01, 0.99, 0.49, -0.32 // batch two + }; + std::memcpy(eval_data.input_data, input_data, 12 * sizeof(float)); + + // Initialize hidden state as zeros + const float hidden_state[4] = {}; + std::memcpy(eval_data.hidden_state, hidden_state, 4 * sizeof(float)); + + // The expected model output after 3 time steps using the fixed input and + // parameters + const float expected_output[12] = { + 0.26455893, 0.26870455, 0.47935803, + 0.47937014, 0.58013272, 0.58013278, // batch1 + -1.41184672e-3f, -1.43329117e-5f, 0.46887168, + 0.46891281, 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_output, expected_output, 12 * sizeof(float)); + + const float expected_hidden_state[4] = { + 0.58013272, 0.58013278, // batch1 + 0.50054074, 0.50054148 // batch2 + }; + std::memcpy(eval_data.expected_hidden_state, expected_hidden_state, + 4 * sizeof(float)); + + const float expected_cell_state[4] = { + 0.89740515, 0.8974053, // batch1 + 0.80327607, 0.80327785 // batch2 + }; + std::memcpy(eval_data.expected_cell_state, expected_cell_state, + 4 * sizeof(float)); + return eval_data; +} + +LstmNodeContents +Create2x3x2X2FloatNodeContents(const float* input_data, + const float* hidden_state_data, + const float* cell_state_data) { + // Parameters for different gates + // negative large weights for forget gate to make it really forget + const GateData forget_gate_data = { + /*.activation_weight=*/{-10, -10, -20, -20}, + /*.recurrent_weight=*/{-10, -10, -20, -20}, + /*.fused_bias=*/{1, 2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // positive large weights for input gate to make it really remember + const GateData input_gate_data = { + /*.activation_weight=*/{10, 10, 20, 20}, + /*.recurrent_weight=*/{10, 10, 20, 20}, + /*.fused_bias=*/{-1, -2}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of tanh at normal range (-1,1) + const GateData cell_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + // all ones to test the behavior of sigmoid at normal range (-1. 1) + const GateData output_gate_data = { + /*.activation_weight=*/{1, 1, 1, 1}, + /*.recurrent_weight=*/{1, 1, 1, 1}, + /*.fused_bias=*/{0, 0}, + /*activation_zp_folded_bias=*/{0, 0}, + /*recurrent_zp_folded_bias=*/{0, 0}}; + + LstmNodeContents float_node_contents( + kDefaultBuiltinData, forget_gate_data, input_gate_data, cell_gate_data, + output_gate_data); + + if (input_data != nullptr) { + float_node_contents.SetInputData(input_data); + } + if (hidden_state_data != nullptr) { + float_node_contents.SetHiddenStateData(hidden_state_data); + } + if (cell_state_data != nullptr) { + float_node_contents.SetCellStateData(cell_state_data); + } + return float_node_contents; +} + +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings() { + NodeQuantizationParameters quantization_settings; + quantization_settings.activation_type = kTfLiteInt8; + quantization_settings.weight_type = kTfLiteInt8; + quantization_settings.cell_type = kTfLiteInt16; + quantization_settings.bias_type = kTfLiteInt32; + quantization_settings.nonlinear_activation_input_scale = + 0.00024414062; // std::pow(2.0f, -12.0f) + quantization_settings.nonlinear_activation_output_scale = + 0.00003051757; // std::pow(2.0f, -15.0f) + + // state quantization parameters + quantization_settings.input = {/*scale=*/0.00784313725490196, /*zp=*/0, + /*symmetry=*/false}; + quantization_settings.output = {/*scale=*/0.004705882165580988, /*zp=*/-21, + /*symmetry=*/false}; + quantization_settings.hidden_state = {/*scale=*/0.004705882165580988, + /*zp=*/-21, /*symmetry=*/false}; + quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0, + /*symmetry=*/true}; + + // gate quantization parameters + quantization_settings.forget_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.input_gate = { + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.cell_gate = { + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}}; + quantization_settings.output_gate = { + {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true}, + {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true}}; + + return quantization_settings; +} + +LstmNodeContents +Create2x3x2X2Int8NodeContents(const float* input_data, + const float* hidden_state, + const float* cell_state) { + auto float_node_content = + Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state); + const auto quantization_settings = Get2X2Int8LstmQuantizationSettings(); + return CreateIntegerNodeContents(quantization_settings, + float_node_content); +} + +} // namespace testing +} // namespace tflite diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h new file mode 100644 index 000000000..538119044 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h @@ -0,0 +1,535 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/micro/kernels/lstm_shared.h" +#include "tensorflow/lite/micro/test_helpers.h" + +namespace tflite { +namespace testing { +// Data structure to store all the data used to check output of internal gates +// of one time step +// input_size = batch_size*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +template +struct GateOutputCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float cell_state[gate_output_size]; + float expected_forget_gate_output[gate_output_size]; + float expected_input_gate_output[gate_output_size]; + float expected_output_gate_output[gate_output_size]; + float expected_cell_gate_output[gate_output_size]; + float expected_updated_cell[gate_output_size]; + float expected_updated_hidden[gate_output_size]; +}; + +// Data structure to store all the data used to check the output of the kernel +// of multiple batch, multiple timesteps +// input_size = batch_size*time_steps*input_dimension (size of the input array) +// gate_output_size = batch_size*state_dimension (size of the gate output) +// output_size = time_steps*gate_output_size (size of the output from the +// kernel) +template +struct LstmEvalCheckData { + float input_data[input_size]; + float hidden_state[gate_output_size]; + float expected_output[output_size]; + float expected_hidden_state[gate_output_size]; + float expected_cell_state[gate_output_size]; +}; + +// Struct that holds the weight/bias information for a standard gate (i.e. no +// modification such as layer normalization, peephole, etc.) +// Every gate is defined by the type and size of the weights (bias included) +// inside. +// Specifically, types are weight type and bias type (normally the same +// type of MatMul accumulator). +// activation_weight has shape (hidden state dimension * input tensor dimension) +// recurrent_weight has shape (hidden state dimension * hidden state dimension) +// bias has shape (hidden state dimension, 1) +template +struct GateData { + WeightType activation_weight[state_dimension * input_dimension]; + WeightType recurrent_weight[state_dimension * state_dimension]; + BiasType fused_bias[state_dimension]; + // Quantized model folded the zero point of activations into biases: + // bias + zero_point * weight. + BiasType activation_zp_folded_bias[state_dimension]; + BiasType recurrent_zp_folded_bias[state_dimension]; +}; + +// A struct that holds quantization parameters for a LSTM Tensor +struct TensorQuantizationParameters { + double scale; + int zero_point; + bool symmetry; +}; + +// A struct that holds quantization parameters for an internal gate, which is +// defined by activation/recurrent weight and bias (assuming no internal layer +// normalization) +struct GateQuantizationParameters { + TensorQuantizationParameters activation_weight; + TensorQuantizationParameters recurrent_weight; + TensorQuantizationParameters bias; +}; + +// A struct that holds the quantization settings for the LSTM node. Data +// members can be grouped into five parts. +// 1. Data types (activation,weight, cell, bias) +// 2. Non-linear activation (i.e., tanh and sigmoid) fixed point +// calculation settings +// 3. Input/output tensor quantization settings +// 4. Internal state (hidden and cell) quantization settings +// 5. Internal gate (forget, input, cell, output) settings +struct NodeQuantizationParameters { + TfLiteType activation_type; + TfLiteType weight_type; + TfLiteType cell_type; + TfLiteType bias_type; + // Fixed point setting for integer nonlinear activation calculation + double nonlinear_activation_input_scale; + double nonlinear_activation_output_scale; + // Quantization parameters for input/output + TensorQuantizationParameters input; + TensorQuantizationParameters output; + // Quantization parameters for internal states + TensorQuantizationParameters hidden_state; + TensorQuantizationParameters cell_state; + // Quantization parameters for gates + GateQuantizationParameters forget_gate; + GateQuantizationParameters input_gate; + GateQuantizationParameters cell_gate; + GateQuantizationParameters output_gate; +}; + +// Data structure that holds all the information to evaluate a LSTM kernel +// (mimic the LSTM node). +// Tensor Types: +// ActivationType defines the data type of input/output of the layer. The hidden +// state has the ActivationType as well since it is the layer output of the +// previous time. +// WeightType defines the weight data type inside the internal gates. +// BiasType defines the bias data type inside the internal gates. (normally the +// same type of MatMul accumulator). +// Tensor Shapes: +// The input to the layer has shape (batch_size,time_steps,input_dimension). +// Both the hidden state and cell state has shape (state_dimension, 1) +// The output of the layer has shape (batch_size,time_steps,state_dimension) +// Note: state values can change through calls (stateful) +template +class LstmNodeContents { + public: + LstmNodeContents(const LstmNodeContents& other) = default; + LstmNodeContents& operator=(const LstmNodeContents& other) = default; + // Use the general model setting (builtin data) and the four gates data to + // construct the node content. Note the input, hidden state, and cell state + // data is provided later for flexible testing (initialize as zero now) + LstmNodeContents( + const TfLiteLSTMParams builtin_data, + const GateData + forget_gate_params, + const GateData + input_gate_params, + const GateData + cell_gate_params, + const GateData + output_gate_params) + : builtin_data_(builtin_data), + forget_gate_data_(forget_gate_params), + input_gate_data_(input_gate_params), + cell_gate_data_(cell_gate_params), + output_gate_data_(output_gate_params) { + InitializeTensors(); + } + + // Add quantization parameters (scale, zero point) to tensors + // Only required for the integer kernel + void AddQuantizationParameters( + const NodeQuantizationParameters& quantization_params) { + quantization_settings_ = quantization_params; + // Input Tensor + SetTensorQuantizationParam(kLstmInputTensor, quantization_params.input); + // Forget Gate Tensors + const auto& forget_gate_quant_param = quantization_params.forget_gate; + SetTensorQuantizationParam(kLstmInputToForgetWeightsTensor, + forget_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToForgetWeightsTensor, + forget_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmForgetGateBiasTensor, + forget_gate_quant_param.bias); + // Input Gate Tensors + const auto& input_gate_quant_param = quantization_params.input_gate; + SetTensorQuantizationParam(kLstmInputToInputWeightsTensor, + input_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToInputWeightsTensor, + input_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmInputGateBiasTensor, + input_gate_quant_param.bias); + // Cell Gate Tensors + const auto& cell_gate_quant_param = quantization_params.cell_gate; + SetTensorQuantizationParam(kLstmInputToCellWeightsTensor, + cell_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToCellWeightsTensor, + cell_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmCellGateBiasTensor, + cell_gate_quant_param.bias); + // Output Gate Tensors + const auto& output_gate_quant_param = quantization_params.output_gate; + SetTensorQuantizationParam(kLstmInputToOutputWeightsTensor, + output_gate_quant_param.activation_weight); + SetTensorQuantizationParam(kLstmRecurrentToOutputWeightsTensor, + output_gate_quant_param.recurrent_weight); + SetTensorQuantizationParam(kLstmOutputGateBiasTensor, + output_gate_quant_param.bias); + // State Tensors + SetTensorQuantizationParam(kLstmOutputStateTensor, + quantization_params.hidden_state); + SetTensorQuantizationParam(kLstmCellStateTensor, + quantization_params.cell_state); + // Output Tensor + SetTensorQuantizationParam(24, quantization_params.output); + } + + // Provide interface to set the input tensor values for flexible testing + void SetInputData(const ActivationType* data) { + std::memcpy( + input_, data, + batch_size * input_dimension * time_steps * sizeof(ActivationType)); + SetTensor(kLstmInputTensor, input_, input_size_); + } + const ActivationType* GetInputData() const { return input_; } + + // Provide interface to set the hidden state tensor values for flexible + // testing + void SetHiddenStateData(const ActivationType* data) { + std::memcpy(hidden_state_, data, + batch_size * state_dimension * sizeof(ActivationType)); + } + ActivationType* GetHiddenStateData() { return hidden_state_; } + + // Provide interface to set the cell state tensor values for flexible + // testing + void SetCellStateData(const CellType* data) { + std::memcpy(cell_state_, data, + batch_size * state_dimension * sizeof(CellType)); + } + CellType* GetCellStateData() { return cell_state_; } + ActivationType* GetOutputData() { return output_; } + + // Internal tensors, fixed (const). see lstm_shared.h for tensor names + const TfLiteEvalTensor* GetEvalTensor(const int tensor_index) const { + auto valid_index = input_tensor_indeces_[tensor_index + 1]; + if (valid_index < 0) { + return nullptr; + } + return &eval_tensors_[tensor_index]; + } + + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateEvalTensor() { + return &eval_tensors_[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateEvalTensor() { + return &eval_tensors_[kLstmCellStateTensor]; + } + TfLiteEvalTensor* OutputEvalTensor() { return &eval_tensors_[24]; } + + const GateData& + ForgetGateData() const { + return forget_gate_data_; + } + const GateData& + InputGateData() const { + return input_gate_data_; + } + const GateData& + CellGateData() const { + return cell_gate_data_; + } + const GateData& + OutputGateData() const { + return output_gate_data_; + } + + const TfLiteLSTMParams BuiltinData() const { return builtin_data_; } + + const NodeQuantizationParameters QuantizationSettings() const { + return quantization_settings_; + } + + private: + void InitializeTensors() { + // Invalid all the input tensors untill we set it + input_tensor_indeces_[0] = 24; // tot elements + for (size_t i = 1; i < 25; i++) { + input_tensor_indeces_[i] = kTfLiteOptionalTensor; + } + // Input Tensor + SetTensor(kLstmInputTensor, input_, input_size_); + // Forget Gate Tensors + SetTensor(kLstmInputToForgetWeightsTensor, + forget_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToForgetWeightsTensor, + forget_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmForgetGateBiasTensor, forget_gate_data_.fused_bias, + bias_size_); + // Input Gate Tensors + SetTensor(kLstmInputToInputWeightsTensor, + input_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToInputWeightsTensor, + input_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmInputGateBiasTensor, input_gate_data_.fused_bias, + bias_size_); + // Cell Gate Tensors + SetTensor(kLstmInputToCellWeightsTensor, cell_gate_data_.activation_weight, + activation_weight_size_); + SetTensor(kLstmRecurrentToCellWeightsTensor, + cell_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmCellGateBiasTensor, cell_gate_data_.fused_bias, bias_size_); + // Output Gate Tensors + SetTensor(kLstmInputToOutputWeightsTensor, + output_gate_data_.activation_weight, activation_weight_size_); + SetTensor(kLstmRecurrentToOutputWeightsTensor, + output_gate_data_.recurrent_weight, recurrent_weight_size_); + SetTensor(kLstmOutputGateBiasTensor, output_gate_data_.fused_bias, + bias_size_); + // State Tensors + SetTensor(kLstmOutputStateTensor, hidden_state_, state_size_); + SetTensor(kLstmCellStateTensor, cell_state_, state_size_); + // // Output Tensor + SetTensor(24, output_, output_size_); + } + + template + void SetTensor(const int index, const T* data, int* dims) { + // Lite tensors for kernel level testing + tensors_[index].data.data = const_cast(data); + tensors_[index].dims = IntArrayFromInts(dims); + tensors_[index].type = typeToTfLiteType(); + // Eval tensors for internal computation testing + eval_tensors_[index].data.data = const_cast(data); + eval_tensors_[index].dims = IntArrayFromInts(dims); + eval_tensors_[index].type = typeToTfLiteType(); + // update the index + input_tensor_indeces_[index + 1] = index; + } + + void SetTensorQuantizationParam( + const int index, const TensorQuantizationParameters& quant_param) { + tensors_[index].params.scale = quant_param.scale; + tensors_[index].params.zero_point = quant_param.zero_point; + } + + const TfLiteLSTMParams builtin_data_; + GateData + forget_gate_data_; + GateData + input_gate_data_; + GateData + cell_gate_data_; + GateData + output_gate_data_; + + // Keep to ease the testing process (although all quantization information can + // be obtained from individual tensors, they are well organized here and light + // weighted) + NodeQuantizationParameters quantization_settings_; + + // Not const since IntArrayFromInts takes int *; the first element of the + // array must be the size of the array + int input_size_[4] = {3, batch_size, time_steps, input_dimension}; + int output_size_[4] = {3, batch_size, time_steps, state_dimension}; + int activation_weight_size_[3] = {2, state_dimension, input_dimension}; + int recurrent_weight_size_[3] = {2, state_dimension, state_dimension}; + int bias_size_[2] = {1, state_dimension}; + int state_size_[3] = {2, batch_size, state_dimension}; + + // see lstm_shared.h for tensor names, the last tensor is the output tensor + TfLiteTensor tensors_[24 + 1]; + // Use for internel kernel testing + TfLiteEvalTensor eval_tensors_[24 + 1]; + // indices for the tensors inside the node (required by kernel runner) + int input_tensor_indeces_[1 + 24] = {}; + // single output (last in the tensors array) + int output_tensor_indeces_[2] = {1, 24}; + + // tennsor data + // states are initialized to zero + ActivationType hidden_state_[batch_size * state_dimension] = {}; + CellType cell_state_[batch_size * state_dimension] = {}; + // input is defined in the ModelContent (const across all derived models) + ActivationType input_[batch_size * input_dimension * time_steps] = {}; + ActivationType output_[batch_size * state_dimension * time_steps] = {}; +}; + +// Convert floating point gate data to the corresponding quantized version +template +GateData +CreateQuantizedGateData( + const GateData& + gate_parameters, + const TensorQuantizationParameters& input_quantization_params, + const TensorQuantizationParameters& output_quantization_params, + const GateQuantizationParameters& gate_quantization_params) { + GateData + quantized_gate_data; + tflite::SymmetricQuantize(gate_parameters.activation_weight, + quantized_gate_data.activation_weight, + state_dimension * input_dimension, + gate_quantization_params.activation_weight.scale); + tflite::SymmetricQuantize(gate_parameters.recurrent_weight, + quantized_gate_data.recurrent_weight, + state_dimension * state_dimension, + gate_quantization_params.recurrent_weight.scale); + tflite::SymmetricQuantize(gate_parameters.fused_bias, + quantized_gate_data.fused_bias, state_dimension, + gate_quantization_params.bias.scale); + + // Copy the bias values to prepare zero_point folded bias precomputation. bias + // has same scale as input_scale*input_weight_scale) + std::memcpy(quantized_gate_data.activation_zp_folded_bias, + quantized_gate_data.fused_bias, + state_dimension * sizeof(BiasType)); + // Pre-calculate bias - zero_point * weight (a constant). + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_data.activation_weight, + -1 * input_quantization_params.zero_point, state_dimension, + input_dimension, quantized_gate_data.activation_zp_folded_bias); + + // Initialize the folded bias to zeros for accumulation + for (size_t i = 0; i < state_dimension; i++) { + quantized_gate_data.recurrent_zp_folded_bias[i] = 0; + } + // Calculate : -zero_point * weight since it is a constant + tflite::tensor_utils::MatrixScalarMultiplyAccumulate( + quantized_gate_data.recurrent_weight, + -1 * output_quantization_params.zero_point, state_dimension, + state_dimension, quantized_gate_data.recurrent_zp_folded_bias); + + return quantized_gate_data; +} + +// Create integer LSTM node content from the float node contents and +// quantization settings +template +LstmNodeContents +CreateIntegerNodeContents( + const NodeQuantizationParameters& quantization_settings, + LstmNodeContents& float_node_contents) { + const auto quantized_forget_gate_data = + CreateQuantizedGateData( + float_node_contents.ForgetGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.forget_gate); + const auto quantized_input_gate_data = + CreateQuantizedGateData( + float_node_contents.InputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.input_gate); + const auto quantized_cell_gate_data = + CreateQuantizedGateData( + float_node_contents.CellGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.cell_gate); + const auto quantized_output_gate_params = + CreateQuantizedGateData( + float_node_contents.OutputGateData(), quantization_settings.input, + quantization_settings.output, quantization_settings.output_gate); + LstmNodeContents + quantized_node_content( + float_node_contents.BuiltinData(), quantized_forget_gate_data, + quantized_input_gate_data, quantized_cell_gate_data, + quantized_output_gate_params); + + // Quantize the floating point input + ActivationType quantized_input[batch_size * input_dimension * time_steps] = + {}; + Quantize(float_node_contents.GetInputData(), quantized_input, + batch_size * input_dimension * time_steps, + quantization_settings.input.scale, + quantization_settings.input.zero_point); + quantized_node_content.SetInputData(quantized_input); + // Quantize the floating point hidden state + ActivationType quantized_hidden_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetHiddenStateData(), quantized_hidden_state, + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + quantized_node_content.SetHiddenStateData(quantized_hidden_state); + // Quantize the floating point cell state + CellType quantized_cell_state[batch_size * state_dimension] = {}; + Quantize(float_node_contents.GetCellStateData(), quantized_cell_state, + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point); + quantized_node_content.SetCellStateData(quantized_cell_state); + + // Add scale and zero point to tensors + quantized_node_content.AddQuantizationParameters(quantization_settings); + return quantized_node_content; +} + +// Get the gate output data (one time step) for a simple 2X2 model +// batch_size = 2; time_steps = 1; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 4 +// gate_output_size = batch_size*state_dimension = 4 +GateOutputCheckData<4, 4> Get2X2GateOutputCheckData(); + +// Get the kernel output data for a simple 2X2 model +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input_size = batch_size*time_steps*input_dimension = 12 +// gate_output_size = batch_size*state_dimension = 4 +// output_size = time_steps*gate_output_size = 12 +LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData(); + +// Create a 2x2 float node content +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +LstmNodeContents +Create2x3x2X2FloatNodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +// Get the quantization settings for the 2X2 model +NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings(); + +// Create int8 (activation) x int8 (weight) -> int16 (cell) node +// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2 +// input is in float format since the source of truth is always the float +// configuration +LstmNodeContents +Create2x3x2X2Int8NodeContents(const float* input_data = nullptr, + const float* hidden_state = nullptr, + const float* cell_state = nullptr); + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_ diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h index a4d50c83a..7a231ff95 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -399,8 +399,7 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddPack() { - return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(), - ParsePack); + return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack); } TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) { diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc index 9f6fc74c9..e9eb5e549 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc +++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc @@ -52,7 +52,7 @@ void MicroProfiler::Log() const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, + MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); } #endif @@ -63,7 +63,7 @@ void MicroProfiler::LogCsv() const { MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%u", i, tags_[i], ticks); + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); } #endif } diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh new file mode 100755 index 000000000..abfe651c6 --- /dev/null +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Called with following arguments: +# 1 - EXTERNAL or INTERNAL to signal how to run the script +# 2 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called). +# 3 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code + +set -e +pwd + +TENSORFLOW_ROOT=${1} +EXTERNAL_DIR=${2} + +source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +# TODO(b/143904317): downloading first to allow for parallel builds. +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + build -j$(nproc) + +readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ + TARGET=xtensa \ + TARGET_ARCH=hifimini \ + OPTIMIZED_KERNEL_DIR=xtensa \ + XTENSA_CORE=mini1m1m_RG \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + test -j$(nproc) \ No newline at end of file diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako index 68176c566..3de4ef406 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako @@ -75,7 +75,7 @@ void RunModel(const uint8_t* model, TfLiteTensor* output_tensor = interpreter.output(0); TF_LITE_MICRO_EXPECT_EQ(output_tensor->bytes, golden_size * sizeof(int8_t)); - int8_t* output = GetTensorData(output_tensor); + int8_t* output = ::tflite::GetTensorData(output_tensor); for (uint32_t i = 0; i < golden_size; i++) { // TODO(b/205046520): Better understand why TfLite and TFLM can sometimes be // off by 1. diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh index da51d756a..665c6a62e 100755 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh @@ -55,6 +55,8 @@ if [ ! -f ${CONVERTED_PERSON_MODEL_INT8} ]; then source $TEMPFILE/bin/activate python3 -m pip install --upgrade pip >&2 pip install --upgrade cython >&2 + pip install numpy==1.21.3 >&2 # Some types are removed in the latest numpy. + # Use an older version until the ethos-u-vela package is updated. pip install --prefer-binary ethos-u-vela >&2 vela --accelerator-config=ethos-u55-256 ${DOWNLOADS_DIR}/../../../models/person_detect.tflite \ --output-dir ${MODEL_DIR} >&2 diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc index 20ee1e4e0..e21bcf953 100644 --- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc +++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc @@ -16,7 +16,13 @@ MICROLITE_CC_KERNEL_SRCS += \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_vision.cc -ifeq ($(TARGET_ARCH), hifi5) +ifeq ($(TARGET_ARCH), hifimini) + # hifimini optimizations are implemented in the TFLM repository itself. + THIRD_PARTY_KERNEL_CC_SRCS += \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/svdf.cc \ + $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/fully_connected.cc + +else ifeq ($(TARGET_ARCH), hifi5) DOWNLOAD_RESULT := $(shell $(MAKEFILE_DIR)/ext_libs/xtensa_download.sh ${DOWNLOADS_DIR} hifi5 $(TENSORFLOW_ROOT)) ifneq ($(DOWNLOAD_RESULT), SUCCESS) $(error Something went wrong with the xtensa download: $(DOWNLOAD_RESULT)) @@ -124,7 +130,7 @@ else ifeq ($(TARGET_ARCH), vision_p6) INCLUDES += \ -I$(NNLIB_PATH)/flk/include \ -I$(NNLIB_PATH)/kernels/include/ \ - -I$(NNLIB_PATH)/runtime/include/ + -I$(NNLIB_PATH)/runtime/include/ LDFLAGS += -lidma else @@ -141,4 +147,10 @@ THIRD_PARTY_KERNEL_CC_SRCS += \ THIRD_PARTY_CC_HDRS += \ $(shell find $(FFT_PATH)/hifi3_fft -name "*.h") +else ifeq ($(TARGET_ARCH), hifimini) +THIRD_PARTY_KERNEL_CC_SRCS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.c") + +THIRD_PARTY_CC_HDRS += \ + $(shell find $(FFT_PATH)/hifi2_fft -name "*.h") endif