From 487c3b3efb5449a4cd2d6924d7e27fc78e29ba70 Mon Sep 17 00:00:00 2001
From: CFU-Playground-Bot <cfu-playground-bot@google.com>
Date: Tue, 7 Mar 2023 00:34:50 +0000
Subject: [PATCH] Sync from tflite-micro at a4dc639.

Signed-off-by: CFU-Playground-Bot <cfu-playground-bot@google.com>
---
 conf/tflite-micro.version                     |    2 +-
 .../tensorflow/lite/c/builtin_op_data.h       |    6 +-
 .../tensorflow/lite/c/c_api_types.h           |   10 +-
 .../tflite-micro/tensorflow/lite/c/common.h   |    4 +-
 .../tensorflow/lite/core/c/common.cc          |   28 +-
 .../tensorflow/lite/core/c/common.h           |   36 +-
 .../tensorflow/lite/kernels/internal/common.h |  194 +-
 .../internal/reference/integer_ops/add.h      |   23 +-
 .../internal/reference/integer_ops/conv.h     |   15 -
 .../reference/integer_ops/depthwise_conv.h    |   16 -
 .../reference/integer_ops/fully_connected.h   |  138 +-
 .../internal/reference/integer_ops/mean.h     |   65 +-
 .../internal/reference/integer_ops/mul.h      |   10 +-
 .../lite/kernels/internal/reference/reduce.h  |  135 +-
 .../tensorflow/lite/kernels/kernel_util.cc    |    2 +-
 .../tensorflow/lite/kernels/kernel_util.h     |    2 +-
 .../lite/micro/examples/hello_world/BUILD     |   92 +-
 .../micro/examples/hello_world/Makefile.inc   |   45 +-
 .../lite/micro/examples/hello_world/README.md |   33 +-
 .../micro/examples/hello_world/evaluate.py    |  131 ++
 .../examples/hello_world/evaluate_test.cc     |  142 ++
 .../examples/hello_world/evaluate_test.py     |  103 ++
 .../hello_world/images/hello_world_tflite.png |  Bin 0 -> 27649 bytes
 .../hello_world/images/hello_world_tflm.png   |  Bin 0 -> 27448 bytes
 .../micro/examples/hello_world/models/BUILD   |   24 +
 .../models/hello_world_float.tflite           |  Bin 0 -> 3164 bytes
 .../lite/micro/examples/hello_world/train.py  |  141 ++
 .../train/train_hello_world_model.ipynb       |    2 -
 .../train/train_hello_world_model.py          |    2 -
 .../micro/examples/micro_speech/Makefile.inc  |   10 +-
 .../lite/micro/examples/mnist_lstm/BUILD      |    3 +-
 .../micro/examples/mnist_lstm/evaluate.py     |   99 +-
 .../examples/mnist_lstm/evaluate_test.py      |  137 +-
 .../mnist_lstm/trained_lstm_int8.tflite       |  Bin 0 -> 13952 bytes
 .../examples/person_detection/Makefile.inc    |   10 +-
 .../lite/micro/kernels/activations_common.cc  |    4 +-
 .../tensorflow/lite/micro/kernels/ceil.cc     |   14 +-
 .../lite/micro/kernels/comparisons.cc         |   30 +-
 .../lite/micro/kernels/concatenation.cc       |   14 +-
 .../tensorflow/lite/micro/kernels/conv.cc     |   12 +-
 .../lite/micro/kernels/conv_test.cc           |    6 +
 .../lite/micro/kernels/depthwise_conv.cc      |   20 +-
 .../lite/micro/kernels/depthwise_conv_test.cc |  186 +-
 .../lite/micro/kernels/elementwise.cc         |    4 +
 .../tensorflow/lite/micro/kernels/floor.cc    |   14 +-
 .../lite/micro/kernels/fully_connected.cc     |   30 +-
 .../lite/micro/kernels/fully_connected.h      |    2 +-
 .../micro/kernels/fully_connected_common.cc   |    3 +-
 .../lite/micro/kernels/gather_nd.cc           |    7 +
 .../lite/micro/kernels/kernel_util.cc         |   20 +
 .../lite/micro/kernels/kernel_util.h          |    8 +
 .../tensorflow/lite/micro/kernels/l2norm.cc   |   13 +-
 .../lite/micro/kernels/leaky_relu_common.cc   |    9 +-
 .../lite/micro/kernels/lstm_eval.cc           | 1616 +++--------------
 .../tensorflow/lite/micro/kernels/lstm_eval.h |  798 +++++---
 .../lite/micro/kernels/lstm_eval_common.cc    |  326 ++++
 .../lite/micro/kernels/lstm_eval_test.h       |  817 +++++++++
 .../lite/micro/kernels/lstm_shared.h          |   83 +
 .../lite/micro/kernels/maximum_minimum.cc     |   24 +-
 .../tensorflow/lite/micro/kernels/micro_ops.h |   51 +-
 .../lite/micro/kernels/mul_common.cc          |   16 +-
 .../tensorflow/lite/micro/kernels/pack.cc     |   11 +-
 .../tensorflow/lite/micro/kernels/pooling.cc  |   16 +-
 .../tensorflow/lite/micro/kernels/pooling.h   |   65 +-
 .../lite/micro/kernels/pooling_common.cc      |   54 +-
 .../tensorflow/lite/micro/kernels/reshape.cc  |    7 +-
 .../micro/kernels/resize_nearest_neighbor.cc  |   15 +-
 .../tensorflow/lite/micro/kernels/split.cc    |   13 +-
 .../tensorflow/lite/micro/kernels/split_v.cc  |   13 +-
 .../lite/micro/kernels/squared_difference.cc  |  125 +-
 .../lite/micro/kernels/strided_slice.cc       |   15 +-
 .../lite/micro/kernels/sub_common.cc          |   10 +-
 .../lite/micro/kernels/svdf_common.cc         |   16 +-
 .../tensorflow/lite/micro/kernels/tanh.cc     |   17 +-
 .../lite/micro/kernels/testdata/BUILD         |   47 +
 .../micro/kernels/testdata/lstm_test_data.cc  |  309 ++++
 .../micro/kernels/testdata/lstm_test_data.h   |  579 ++++++
 .../testdata/lstm_test_data_generator.py      |  192 ++
 .../testdata/lstm_test_data_generator_test.py |  108 ++
 .../kernels/testdata/lstm_test_data_utils.py  |  531 ++++++
 .../kernels/unidirectional_sequence_lstm.cc   | 1435 ++-------------
 .../kernels/unidirectional_sequence_lstm.h    |   47 +
 .../tensorflow/lite/micro/kernels/unpack.cc   |   12 +-
 .../lite/micro/micro_allocation_info.cc       |   10 +-
 .../tensorflow/lite/micro/micro_allocator.cc  |   50 +-
 .../tensorflow/lite/micro/micro_allocator.h   |   10 +-
 .../lite/micro/micro_mutable_op_resolver.h    |   69 +-
 .../tensorflow/lite/micro/micro_profiler.cc   |    4 +-
 .../lite/micro/micro_resource_variable.cc     |   18 +-
 .../lite/micro/micro_resource_variable.h      |    2 +
 .../lite/micro/recording_micro_allocator.cc   |    9 +-
 .../lite/micro/recording_micro_allocator.h    |    7 +-
 .../tensorflow/lite/micro/tools/BUILD         |   34 +
 .../lite/micro/tools/ci_build/test_bazel.sh   |   20 +-
 .../micro/tools/ci_build/test_bazel_asan.sh   |   34 +
 .../micro/tools/ci_build/test_bazel_msan.sh   |   34 +
 .../tools/ci_build/test_bazel_tflite_tools.sh |   26 +
 .../ci_build/test_bluepill_no_release.sh      |   41 +
 .../tools/ci_build/test_bluepill_release.sh   |   38 +
 .../tools/ci_build/test_bluepill_renode.sh    |   39 +
 .../micro/tools/ci_build/test_code_style.sh   |    1 +
 .../tools/ci_build/test_stm32f4_no_release.sh |   39 +
 .../tools/ci_build/test_stm32f4_release.sh    |   37 +
 .../micro/tools/ci_build/test_x86_default.sh  |   42 +
 .../test_x86_no_tflite_static_memory.sh       |   42 +
 .../tools/ci_build/test_x86_out_of_tree.sh    |   46 +
 .../micro/tools/ci_build/test_x86_release.sh  |   46 +
 .../tools/ci_build/test_xtensa_hifimini.sh    |   50 +
 .../micro_mutable_op_resolver_test.cc.mako    |    2 +-
 .../tensorflow/lite/micro/tools/make/Makefile |    7 +-
 .../tools/make/ext_libs/cmsis_nn_download.sh  |    6 +-
 .../lite/micro/tools/make/ext_libs/xtensa.inc |   19 +-
 .../micro/tools/make/helper_functions.inc     |   10 +-
 .../lite/micro/tools/make/test_latency_log.sh |   54 +
 .../micro/tools/project_generation/Makefile   |    6 +-
 .../lite/micro/tools/requantize_flatbuffer.py |  222 +++
 .../micro/tools/requantize_flatbuffer_test.py |  115 ++
 .../tools/requantize_flatbuffer_utils.py      |  325 ++++
 118 files changed, 6840 insertions(+), 4098 deletions(-)
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_no_release.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_release.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh
 create mode 100755 third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
 create mode 100644 third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py

diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version
index 9874f722c..ceadc0473 100644
--- a/conf/tflite-micro.version
+++ b/conf/tflite-micro.version
@@ -1 +1 @@
-8746ec9
+a4dc639
diff --git a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h
index b1981b3c5..7628e5ad1 100644
--- a/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h
+++ b/third_party/tflite-micro/tensorflow/lite/c/builtin_op_data.h
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,8 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 #define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 
-/// For documentation, see
-/// third_party/tensorflow/lite/core/c/builtin_op_data.h.
-#include "tensorflow/lite/core/c/builtin_op_data.h"  // IWYU pragma: export
+#include "tensorflow/lite/core/c/builtin_op_data.h"
 
 #endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h
index 18bccde66..cdbf1fd32 100644
--- a/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h
+++ b/third_party/tflite-micro/tensorflow/lite/c/c_api_types.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,15 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-
-// This file declares types used by the pure C inference API defined in c_api.h,
-// some of which are also used in the C++ and C kernel and interpreter APIs.
-
 #ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
 #define TENSORFLOW_LITE_C_C_API_TYPES_H_
 
-/// For documentation, see
-/// third_party/tensorflow/lite/core/c/c_api_types.h.
-#include "tensorflow/lite/core/c/c_api_types.h"  // IWYU pragma: export
+#include "tensorflow/lite/core/c/c_api_types.h"
 
 #endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/c/common.h b/third_party/tflite-micro/tensorflow/lite/c/common.h
index 718650df8..e3e8001cb 100644
--- a/third_party/tflite-micro/tensorflow/lite/c/common.h
+++ b/third_party/tflite-micro/tensorflow/lite/c/common.h
@@ -36,8 +36,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_COMMON_H_
 #define TENSORFLOW_LITE_C_COMMON_H_
 
-/// For documentation, see
-/// third_party/tensorflow/lite/core/c/common.h.
-#include "tensorflow/lite/core/c/common.h"  // IWYU pragma: export
+#include "tensorflow/lite/core/c/common.h"
 
 #endif  // TENSORFLOW_LITE_C_COMMON_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
index 827312b45..00bbcde28 100644
--- a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
@@ -219,11 +219,11 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
   return kTfLiteOk;
 }
 
-void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
-                                 bool preserve_data) {
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data) {
   if (tensor->allocation_type != kTfLiteDynamic &&
       tensor->allocation_type != kTfLitePersistentRo) {
-    return;
+    return kTfLiteOk;
   }
 #ifdef TF_LITE_TENSORFLOW_PROFILER
   tflite::PauseHeapMonitoring(/*pause=*/true);
@@ -258,9 +258,15 @@ void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
   tflite::PauseHeapMonitoring(/*pause=*/false);
 #endif
   tensor->bytes = num_bytes;
+  if (tensor->data.data == nullptr && num_bytes != 0) {
+    // We are done allocating but tensor is pointing to null and a valid size
+    // was requested, so we error.
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
 }
 
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
   return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true);
 }
 #endif  // TF_LITE_STATIC_MEMORY
@@ -331,4 +337,18 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) {
   delete tflite_delegate;
 }
 
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) {
+  if (!delegate) return nullptr;
+
+  // The following cast is safe only because this code is part of the
+  // TF Lite runtime implementation.  Apps using TF Lite should not rely on
+  // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent.
+  const auto* tflite_delegate =
+      reinterpret_cast<const TfLiteDelegate*>(delegate);
+
+  if (!tflite_delegate->opaque_delegate_builder) return tflite_delegate->data_;
+
+  return tflite_delegate->opaque_delegate_builder->data;
+}
+
 }  // extern "C"
diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.h b/third_party/tflite-micro/tensorflow/lite/core/c/common.h
index 46d5e650a..36bb01a96 100644
--- a/third_party/tflite-micro/tensorflow/lite/core/c/common.h
+++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.h
@@ -42,6 +42,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_
 #define TENSORFLOW_LITE_CORE_C_COMMON_H_
 
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -648,23 +649,26 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst);
 
 // Change the size of the memory block owned by `tensor` to `num_bytes`.
-// Tensors with allocation types other than kTfLiteDynamic will be ignored.
+// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and
+// a kTfLiteOk will be returned.
 // `tensor`'s internal data buffer will be assigned a pointer
 // which can safely be passed to free or realloc if `num_bytes` is zero.
-// Behaviour is undefined if `tensor` is NULL.
 // If `preserve_data` is true, tensor data will be unchanged in the range from
-// the start of the region up to the minimum of the old and new sizes.
-void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
-                                 bool preserve_data);
+// the start of the region up to the minimum of the old and new sizes. In the
+// case of NULL tensor, or an error allocating new memory, returns
+// `kTfLiteError`.
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data);
 
 // Change the size of the memory block owned by `tensor` to `num_bytes`.
-// Tensors with allocation types other than kTfLiteDynamic will be ignored.
+// Tensors with allocation types other than kTfLiteDynamic will be ignored and
+// a kTfLiteOk will be returned.
 // `tensor`'s internal data buffer will be assigned a pointer
 // which can safely be passed to free or realloc if `num_bytes` is zero.
-// Behaviour is undefined if `tensor` is NULL.
 // Tensor data will be unchanged in the range from the start of the region up to
-// the minimum of the old and new sizes.
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
+// the minimum of the old and new sizes. In the case
+// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`.
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
 #endif  // TF_LITE_STATIC_MEMORY
 
 // WARNING: This is an experimental interface that is subject to change.
@@ -1135,6 +1139,20 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate(
 // 'delegate' is a null pointer.
 void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate);
 
+// Returns a pointer to the data associated with the provided opaque 'delegate'.
+//
+// A null pointer will be returned when:
+// - The 'delegate' is null.
+// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the
+//   'delegate' was null.
+// - Or in case of any other error.
+// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder',
+//   but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null.
+//
+//  The data_ field of 'delegate' will be returned if the
+//  'opaque_delegate_builder' field is null.
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h
index c641bc94c..00fe01f7a 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/common.h
@@ -328,14 +328,16 @@ template <typename T>
 int CountLeadingZeros(T integer_input) {
   static_assert(std::is_unsigned<T>::value,
                 "Only unsigned integer types handled.");
-#if defined(__GNUC__)
-  return integer_input ? __builtin_clz(integer_input)
-                       : std::numeric_limits<T>::digits;
-#else
   if (integer_input == 0) {
     return std::numeric_limits<T>::digits;
   }
-
+#if defined(__GNUC__)
+  if (std::is_same<T, uint32_t>::value) {
+    return __builtin_clz(integer_input);
+  } else if (std::is_same<T, uint64_t>::value) {
+    return __builtin_clzll(integer_input);
+  }
+#endif
   const T one_in_leading_positive = static_cast<T>(1)
                                     << (std::numeric_limits<T>::digits - 1);
   int leading_zeros = 0;
@@ -344,7 +346,6 @@ int CountLeadingZeros(T integer_input) {
     ++leading_zeros;
   }
   return leading_zeros;
-#endif
 }
 
 template <typename T>
@@ -377,40 +378,49 @@ inline Integer FloorLog2(Integer n) {
   }
 }
 
-// The size of the LUT depends on the type of input. For uint8 and int8 inputs
-// we use a 256 entries LUT to map all the values in the (u)int8 range. For
-// int16 inputs the high 9 bits are used for indexing and the 7 remaining bits
-// are used for interpolation. We thus use a 513-entries LUT for int16 cases,
-// 512 for the 9-bit indexing and 1 extra entry to interpolate the last value.
-template <typename T>
-constexpr int LUTSize() {
-  static_assert(std::is_same<T, uint8_t>::value ||
-                    std::is_same<T, int8_t>::value ||
-                    std::is_same<T, int16_t>::value,
-                "Only LUTs with uint8, int8 or int16 inputs are supported.");
-  // As per c++11: constexpr methods cannot have more than one return statement.
-  return (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
-             ? 256
-             : 513;
+namespace detail {
+
+// LUTPopulate takes an optional type-erased transform_params to allow passing
+// extra parameters to the transform function pointer. const void* is used
+// instead of std::function to be compatible with TFLite Micro
+template <typename FloatT, typename Func>
+inline typename std::enable_if<std::is_same<Func, FloatT (*)(FloatT)>::value,
+                               FloatT>::type
+LUTTransform(Func transform, const void* /*transform_params*/, FloatT value) {
+  static_assert(std::is_floating_point<FloatT>::value,
+                "FloatT must be a floating-point type.");
+  return transform(value);
+}
+
+template <typename FloatT, typename Func>
+inline typename std::enable_if<
+    std::is_same<Func, FloatT (*)(FloatT, const void*)>::value, FloatT>::type
+LUTTransform(Func transform, const void* transform_params, FloatT value) {
+  static_assert(std::is_floating_point<FloatT>::value,
+                "FloatT must be a floating-point type.");
+  return transform(value, transform_params);
 }
 
 // Use the same LUT generation code for both uint8_t and int8_t. Int8_t indexes
 // will be directly casted to uint8_t, the int8 LUT will thus be ordered as [0,
 // 1, ..., 127, -128, ..., -2, -1] instead of [-128, -127, ..., -1, 0, 1, ...,
 // 126, 127].
-template <typename T>
-inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
-                                   std::is_same<T, int8_t>::value,
-                               void>::type
-LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
-            int32_t output_zero_point, float (*transform)(float), T* lut) {
+template <typename T, typename Func>
+inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point,
+                            float output_scale, int32_t output_zero_point,
+                            Func transform, const void* transform_params,
+                            T* lut) {
+  static_assert(
+      std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+      "T must be an uint8 or int8 type.");
   uint8_t* lut_uint8 = reinterpret_cast<uint8_t*>(lut);
   const float inverse_scale = 1 / output_scale;
   int32_t maxval = std::numeric_limits<T>::max();
   int32_t minval = std::numeric_limits<T>::min();
   for (int32_t val = minval; val <= maxval; ++val) {
     const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = transform(dequantized);
+    const float transformed =
+        LUTTransform(transform, transform_params, dequantized);
     const float rescaled = TfLiteRound(transformed * inverse_scale);
     const int32_t quantized =
         static_cast<int32_t>(rescaled + output_zero_point);
@@ -421,10 +431,11 @@ LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
 
 // Keep floating-point type configurable for backward compatibility. float
 // should be used for FloatT by default.
-template <typename T, typename FloatT>
-inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
-LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale,
-            int32_t output_zero_point, FloatT (*transform)(FloatT), T* lut) {
+template <typename FloatT, typename Func>
+inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point,
+                             FloatT output_scale, int32_t output_zero_point,
+                             Func transform, const void* transform_params,
+                             int16_t* lut) {
   static_assert(std::is_floating_point<FloatT>::value,
                 "FloatT must be a floating-point type.");
   const FloatT input_min =
@@ -440,16 +451,21 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale,
   const FloatT step = (input_max - input_min) / nb_steps;
   const FloatT half_step = step / 2;
   const FloatT output_scaling_inv =
-      static_cast<FloatT>(std::numeric_limits<T>::max() -
-                          std::numeric_limits<T>::min() + 1) /
+      static_cast<FloatT>(std::numeric_limits<int16_t>::max() -
+                          std::numeric_limits<int16_t>::min() + 1) /
       (output_max - output_min);
-  const FloatT table_min = static_cast<FloatT>(std::numeric_limits<T>::min());
-  const FloatT table_max = static_cast<FloatT>(std::numeric_limits<T>::max());
+  const FloatT table_min =
+      static_cast<FloatT>(std::numeric_limits<int16_t>::min());
+  const FloatT table_max =
+      static_cast<FloatT>(std::numeric_limits<int16_t>::max());
 
   for (int i = 0; i < nb_steps; i++) {
-    const FloatT val = transform(input_min + i * step);
-    const FloatT val_midpoint = transform(input_min + i * step + half_step);
-    const FloatT val_next = transform(input_min + (i + 1) * step);
+    const FloatT val =
+        LUTTransform<FloatT>(transform, transform_params, input_min + i * step);
+    const FloatT val_midpoint = LUTTransform<FloatT>(
+        transform, transform_params, input_min + i * step + half_step);
+    const FloatT val_next = LUTTransform<FloatT>(transform, transform_params,
+                                                 input_min + (i + 1) * step);
 
     const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
     const FloatT midpoint_interp_val =
@@ -460,54 +476,84 @@ LUTPopulate(FloatT input_scale, int32_t input_zero_point, FloatT output_scale,
     const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
     const FloatT bias = TfLiteRound(midpoint_err / 2);
 
-    lut[i] = static_cast<T>(std::min<FloatT>(
+    lut[i] = static_cast<int16_t>(std::min<FloatT>(
         std::max<FloatT>(sample_val - bias, table_min), table_max));
   }
 
-  lut[nb_steps] = static_cast<T>(std::min<FloatT>(
-      std::max<FloatT>(TfLiteRound(transform(input_max) * output_scaling_inv),
+  lut[nb_steps] = static_cast<int16_t>(std::min<FloatT>(
+      std::max<FloatT>(TfLiteRound(LUTTransform<FloatT>(
+                                       transform, transform_params, input_max) *
+                                   output_scaling_inv),
                        table_min),
       table_max));
 }
 
+}  // namespace detail
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
+                                   std::is_same<T, int8_t>::value,
+                               void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float), T* lut) {
+  detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale,
+                          output_zero_point, transform, nullptr, lut);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
+                                   std::is_same<T, int8_t>::value,
+                               void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float, const void*),
+            const void* transform_params, T* lut) {
+  detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale,
+                          output_zero_point, transform, transform_params, lut);
+}
+
 template <typename T>
 inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
 LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
             int32_t output_zero_point, float (*transform)(float), T* lut) {
-  LUTPopulate<T, float>(input_scale, input_zero_point, output_scale,
-                        output_zero_point, transform, lut);
+  detail::LUTPopulateInt16<float>(input_scale, input_zero_point, output_scale,
+                                  output_zero_point, transform, nullptr, lut);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float, const void*),
+            const void* transform_params, T* lut) {
+  detail::LUTPopulateInt16<float>(input_scale, input_zero_point, output_scale,
+                                  output_zero_point, transform,
+                                  transform_params, lut);
 }
 
-// Deprecated and will be removed in future, please use LUTPopulate instead
-template <typename FloatT, typename LutInT, typename LutOutT>
-inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
-                    FloatT output_min, FloatT output_max, LutOutT* lut) {
-  static_assert(std::is_same<LutInT, LutOutT>::value,
-                "Input and output type of the LUT must be the same.");
-  static_assert(std::is_same<LutInT, int16_t>::value,
-                "Only int16_t type LUT are supported.");
-  static_assert(std::is_same<FloatT, float>::value,
-                "Only float type is supported for FloatT.");
-  using T = LutInT;
-
-  const auto zero_point = [](float min, float max, float scale) {
-    // Symmetric int16 LUT, we know the zero-point will not overflow an int32_t
-    // and zero-point from min will be the same as from max.
-    return static_cast<int32_t>(
-        static_cast<float>(std::numeric_limits<T>::min()) - min / scale);
-  };
-
-  const float scale = static_cast<float>(std::numeric_limits<T>::max() -
-                                         std::numeric_limits<T>::min());
-  const float input_scale = (input_max - input_min) / scale;
-  const FloatT output_scale = (output_max - output_min) / scale;
-  const int32_t input_zero_point =
-      zero_point(input_min, input_max, input_scale);
-  const int32_t output_zero_point =
-      zero_point(output_min, output_max, output_scale);
-
-  return LUTPopulate<T, float>(input_scale, input_zero_point, output_scale,
-                               output_zero_point, func, lut);
+// Deprecated, avoid usage and prefer the float version. Kept for
+// backward-compatiblity.
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
+LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale,
+            int32_t output_zero_point, double (*transform)(double), T* lut) {
+  detail::LUTPopulateInt16<double>(input_scale, input_zero_point, output_scale,
+                                   output_zero_point, transform, nullptr, lut);
+}
+
+// The size of the LUT depends on the type of input. For uint8 and int8 inputs a
+// simple 256 entries LUT is used. For int16 inputs the high 9 bits are used for
+// indexing and the 7 remaining bits are used for interpolation. We thus use a
+// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
+// to interpolate the last value.
+template <typename T>
+constexpr int LUTSize() {
+  static_assert(std::is_same<T, uint8_t>::value ||
+                    std::is_same<T, int8_t>::value ||
+                    std::is_same<T, int16_t>::value,
+                "Only LUTs with uint8, int8 or int16 inputs are supported.");
+  // As per c++11: constexpr methods cannot have more than one return statement.
+  return (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+             ? 256
+             : 513;
 }
 
 // int16_t -> int16_t table lookup with interpolation
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
index 8d9b318cc..ec2969b46 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -35,24 +35,25 @@ inline void CheckArithmeticParams(const ArithmeticParams& params) {
   TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
 }
 
-inline void ElementWise(
-    int size, const ArithmeticParams& params, const int8_t* input1_data,
-    const int8_t* input2_data, int8_t* output_data,
-    void (*check_arithmetic_params)(const ArithmeticParams&),
-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
+// TODO(b/270589088): move to a more appropriate file (b/270589088#comment2)
+template <typename T>
+void ElementWise(int size, const ArithmeticParams& params, const T* input1_data,
+                 const T* input2_data, T* output_data,
+                 void (*check_arithmetic_params)(const ArithmeticParams&),
+                 T (*binary_func)(T, T, const ArithmeticParams&)) {
   CheckArithmeticParams(params);
   for (int i = 0; i < size; ++i) {
     output_data[i] = binary_func(input1_data[i], input2_data[i], params);
   }
 }
-
-inline void BroadcastBinaryFunction4DSlow(
+// TODO(b/270589088): move to a more appropriate file. (b/270589088#comment2)
+template <typename T>
+void BroadcastBinaryFunction4DSlow(
     const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const int8_t* input1_data, const RuntimeShape& input2_shape,
-    const int8_t* input2_data, const RuntimeShape& output_shape,
-    int8_t* output_data,
+    const T* input1_data, const RuntimeShape& input2_shape,
+    const T* input2_data, const RuntimeShape& output_shape, T* output_data,
     void (*check_arithmetic_params)(const ArithmeticParams&),
-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
+    T (*binary_func)(T, T, const ArithmeticParams&)) {
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
index 5ddf04aea..ba3e2a81d 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 
 #include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 
 namespace tflite {
 namespace reference_integer_ops {
@@ -134,20 +133,6 @@ inline void ConvPerChannel(
   }
 }
 
-inline void ConvPerChannelWithPackedInt4Weights(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_input, int8_t* unpacked_filter_data,
-    const RuntimeShape& bias_shape, const int32_t* bias_data,
-    const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK(unpacked_filter_data != nullptr);
-  tflite::tensor_utils::UnpackDenseInt4IntoInt8(
-      filter_input, filter_shape.FlatSize(), unpacked_filter_data);
-  ConvPerChannel(params, output_multiplier, output_shift, input_shape,
-                 input_data, filter_shape, unpacked_filter_data, bias_shape,
-                 bias_data, output_shape, output_data);
-}
 
 // Fixed-point per-channel-quantization convolution reference kernel.
 // 16-bit data and 8-bit filter
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
index 312ba0f93..7676fce0f 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 
 #include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 
 namespace tflite {
 namespace reference_integer_ops {
@@ -122,21 +121,6 @@ inline void DepthwiseConvPerChannel(
   }
 }
 
-inline void DepthwiseConvPerChannelWithPackedInt4Weights(
-    const DepthwiseParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, int8_t* unpacked_filter_data,
-    const RuntimeShape& bias_shape, const int32_t* bias_data,
-    const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_NE(unpacked_filter_data, nullptr);
-  tflite::tensor_utils::UnpackDenseInt4IntoInt8(
-      filter_data, filter_shape.FlatSize(), unpacked_filter_data);
-  DepthwiseConvPerChannel(params, output_multiplier, output_shift, input_shape,
-                          input_data, filter_shape, unpacked_filter_data,
-                          bias_shape, bias_data, output_shape, output_data);
-}
-
 inline void DepthwiseConvPerChannel(
     const DepthwiseParams& params, const int32_t* output_multiplier,
     const int32_t* output_shift, const RuntimeShape& input_shape,
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
index 77c766d25..3a74402ed 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 
 #include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 
 namespace tflite {
 namespace reference_integer_ops {
@@ -29,14 +28,15 @@ namespace reference_integer_ops {
 // zero_point (params.weights_offset) is always 0.
 // However, for per-tensor functions, params.weights_offset is still applied for
 // backward compatibility.
-
-inline void FullyConnectedPerChannel(
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnectedPerChannel(
     const FullyConnectedParams& params, const int32_t* output_multiplier,
     const int* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+    const InputType* input_data, const RuntimeShape& filter_shape,
+    const WeightType* filter_data, const RuntimeShape& bias_shape,
+    const BiasType* bias_data, const RuntimeShape& output_shape,
+    OutputType* output_data) {
   const int32_t input_offset = params.input_offset;
   const int32_t output_offset = params.output_offset;
   const int32_t output_activation_min = params.quantized_activation_min;
@@ -52,7 +52,7 @@ inline void FullyConnectedPerChannel(
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
@@ -61,62 +61,26 @@ inline void FullyConnectedPerChannel(
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
-                                          output_shift[out_c]);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-template <typename AccumScalar>
-inline void FullyConnectedPerChannel(
-    const FullyConnectedParams& params, const int32_t* output_multiplier,
-    const int* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += filter_val * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
       int32_t acc_scaled = MultiplyByQuantizedMultiplier(
           acc, output_multiplier[out_c], output_shift[out_c]);
+      acc_scaled += output_offset;
       acc_scaled = std::max(acc_scaled, output_activation_min);
       acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
 
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape,
+                    const InputType* input_data,
+                    const RuntimeShape& filter_shape,
+                    const WeightType* filter_data,
+                    const RuntimeShape& bias_shape, const BiasType* bias_data,
+                    const RuntimeShape& output_shape, OutputType* output_data) {
   const int32_t input_offset = params.input_offset;
   const int32_t filter_offset = params.weights_offset;
   const int32_t output_offset = params.output_offset;
@@ -136,7 +100,7 @@ inline void FullyConnected(
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
@@ -145,67 +109,13 @@ inline void FullyConnected(
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-inline void FullyConnectedWithPackedInt4Weights(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, int8_t* unpacked_filter_data,
-    const RuntimeShape& bias_shape, const int32_t* bias_data,
-    const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_NE(unpacked_filter_data, nullptr);
-  tflite::tensor_utils::UnpackDenseInt4IntoInt8(
-      filter_data, filter_shape.FlatSize(), unpacked_filter_data);
-  FullyConnected(params, input_shape, input_data, filter_shape,
-                 unpacked_filter_data, bias_shape, bias_data, output_shape,
-                 output_data);
-}
-
-template <typename AccumScalar>
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
       int32_t acc_scaled =
           MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc_scaled += output_offset;
       acc_scaled = std::max(acc_scaled, output_activation_min);
       acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
index 09d37b726..7e3f690e9 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
@@ -1,10 +1,10 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,65 +15,4 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-template <typename integer_type>
-inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
-                 int32_t shift, const RuntimeShape& unextended_input_shape,
-                 const integer_type* input_data, int32_t input_zero_point,
-                 const RuntimeShape& unextended_output_shape,
-                 integer_type* output_data, int32_t output_zero_point) {
-  // Current implementation only supports dimension equals 4 and simultaneous
-  // reduction over width and height.
-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-  const int output_batch = output_shape.Dims(0);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int output_depth = output_shape.Dims(3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int num_elements_in_axis = input_width * input_height;
-
-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-  TFLITE_CHECK_EQ(output_height, 1);
-  TFLITE_CHECK_EQ(output_width, 1);
-
-  static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
-  static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
-
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_d = 0; out_d < output_depth; ++out_d) {
-      int32_t acc = 0;
-      for (int in_h = 0; in_h < input_height; ++in_h) {
-        for (int in_w = 0; in_w < input_width; ++in_w) {
-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
-                 input_zero_point;
-        }
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
-                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
-      acc += output_zero_point;
-      acc = std::min(std::max(acc, kMinInt), kMaxInt);
-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-          static_cast<integer_type>(acc);
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
index 22e897409..05066184c 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -24,10 +24,10 @@ limitations under the License.
 namespace tflite {
 namespace reference_integer_ops {
 
-template <typename T>
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
+template <typename InputType, typename OutputType>
+void MulElementwise(int size, const ArithmeticParams& params,
+                    const InputType* input1_data, const InputType* input2_data,
+                    OutputType* output_data) {
   for (int i = 0; i < size; ++i) {
     const int32_t input1_val = params.input1_offset + input1_data[i];
     const int32_t input2_val = params.input2_offset + input2_data[i];
@@ -39,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
     const int32_t clamped_output =
         std::min(params.quantized_activation_max,
                  std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
+    output_data[i] = static_cast<OutputType>(clamped_output);
   }
 }
 
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h
index adc435f90..5b795ea8f 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/reduce.h
@@ -268,11 +268,11 @@ inline bool Mean(const T* input_data, const int* input_dims,
   return true;
 }
 
-template <typename T>
 inline void Mean(const tflite::MeanParams& op_params,
                  const RuntimeShape& unextended_input_shape,
-                 const T* input_data,
-                 const RuntimeShape& unextended_output_shape, T* output_data) {
+                 const float* input_data,
+                 const RuntimeShape& unextended_output_shape,
+                 float* output_data) {
   ruy::profiler::ScopeLabel label("Mean4D");
 
   // Current implementation only supports dimension equals 4 and simultaneous
@@ -312,78 +312,21 @@ inline void Mean(const tflite::MeanParams& op_params,
   }
 }
 
-inline void Mean(const tflite::MeanParams& op_params,
-                 const RuntimeShape& unextended_input_shape,
-                 const uint8_t* input_data, int32_t input_zero_point,
-                 float input_scale, const RuntimeShape& unextended_output_shape,
-                 uint8_t* output_data, int32_t output_zero_point,
-                 float output_scale) {
-  ruy::profiler::ScopeLabel label("Mean4D/Uint8");
-
-  // Current implementation only supports dimension equals 4 and simultaneous
-  // reduction over width and height.
-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-  const int output_batch = output_shape.Dims(0);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int output_depth = output_shape.Dims(3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const float num_elements_in_axis = input_width * input_height;
-
-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-  TFLITE_CHECK_EQ(output_height, 1);
-  TFLITE_CHECK_EQ(output_width, 1);
-
-  constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
-  constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
-
-  float temp = input_zero_point * input_scale / output_scale;
-  temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
-  int32_t bias = output_zero_point - static_cast<int32_t>(temp);
-  double real_scale =
-      static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
-
-  int32_t multiplier;
-  int shift;
-  QuantizeMultiplier(real_scale, &multiplier, &shift);
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_d = 0; out_d < output_depth; ++out_d) {
-      int32_t acc = 0;
-      for (int in_h = 0; in_h < input_height; ++in_h) {
-        for (int in_w = 0; in_w < input_width; ++in_w) {
-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
-        }
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      acc += bias;
-      acc = std::min(std::max(acc, kMinValue), kMaxValue);
-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-          static_cast<uint8_t>(acc);
-    }
-  }
-}
-
 // Computes the mean of elements across dimensions given in axis.
 // It does so in two stages, first calculates the sum of elements along the axis
 // then divides it by the number of element in axis for quantized values.
 template <typename T, typename U>
 inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
-                               float input_scale, const int* input_dims,
-                               const int input_num_dims, T* output_data,
-                               int32_t output_zero_point, float output_scale,
+                               const int* input_dims, const int input_num_dims,
+                               T* output_data, int32_t output_multiplier,
+                               int output_shift, int32_t output_zero_point,
                                const int* output_dims,
                                const int output_num_dims, const int* axis,
                                const int num_axis_dimensions, bool keep_dims,
                                int* temp_index, int* resolved_axis, U* temp_sum,
                                bool compute_sum) {
+  const int32_t kMinValue = std::numeric_limits<T>::min();
+  const int32_t kMaxValue = std::numeric_limits<T>::max();
   const bool uint8_case = std::is_same<T, uint8_t>::value;
   const bool int16_case = std::is_same<T, int16_t>::value;
   if (uint8_case) {
@@ -430,40 +373,46 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
   }
 
   // Calculate mean by dividing output_data by num of aggregated element.
-  size_t num_elements_in_axis = 1;
+  int64_t num_elements_in_axis = 1;
   for (int idx = 0; idx < num_resolved_axis; ++idx) {
     size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
     // Overflow prevention.
-    if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
+    if (current > static_cast<size_t>(std::numeric_limits<int64_t>::max() /
+                                      num_elements_in_axis)) {
       return false;
     }
     num_elements_in_axis *= current;
   }
 
-  if (num_elements_in_axis > 0) {
-    const float scale = input_scale / output_scale;
-    if (compute_sum) {
-      // TODO(b/116341117): Eliminate float and do this completely in 8bit.
-      const float bias = -input_zero_point * scale * num_elements_in_axis;
-      for (size_t idx = 0; idx < num_outputs; ++idx) {
-        const U value =
-            static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
-            output_zero_point;
-        output_data[idx] = static_cast<T>(value);
-      }
-    } else {
-      const float bias = -input_zero_point * scale;
-      for (size_t idx = 0; idx < num_outputs; ++idx) {
-        float float_mean = static_cast<float>(temp_sum[idx]) /
-                           static_cast<float>(num_elements_in_axis);
-        float result = TfLiteMin(
-            TfLiteRound(float_mean * scale + bias) + output_zero_point,
-            static_cast<float>(std::numeric_limits<T>::max()));
-        result = TfLiteMax(result,
-                           static_cast<float>(std::numeric_limits<T>::min()));
-        output_data[idx] = static_cast<T>(result);
-      }
-    }
+  if (num_elements_in_axis == 0) {
+    return true;
+  }
+
+  // Readapt output rescaling when calculating the mean to integrate a
+  // 1/num_elements_in_axis multiplier.
+  if (!compute_sum) {
+    TFLITE_DCHECK_GE(num_elements_in_axis, 0);
+    int shift =
+        63 - CountLeadingZeros(static_cast<uint64_t>(num_elements_in_axis));
+    // To avoid any overflow risk 'shift' should be <= 32 and to satisfy
+    // 'MultiplyByQuantizedMultiplier' pre-conditions 'output_shift - shift'
+    // should be >= -31. Clamp the value at the price of some precision loss.
+    shift = std::min(shift, 32);
+    shift = std::min(shift, 31 + output_shift);
+    output_multiplier = static_cast<int32_t>(
+        (static_cast<int64_t>(output_multiplier) << shift) /
+        num_elements_in_axis);
+    output_shift = output_shift - shift;
+  }
+
+  for (size_t idx = 0; idx < num_outputs; ++idx) {
+    const U shifted_sum =
+        static_cast<U>(temp_sum[idx] - input_zero_point * num_elements_in_axis);
+    int32_t output = MultiplyByQuantizedMultiplier(
+                         shifted_sum, output_multiplier, output_shift) +
+                     output_zero_point;
+    output = std::min(std::max(output, kMinValue), kMaxValue);
+    output_data[idx] = static_cast<T>(output);
   }
   return true;
 }
@@ -478,8 +427,8 @@ inline bool QuantizedMeanOrSumExtraArgs(
     bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum,
     bool compute_sum) {
   return QuantizedMeanOrSum<T, U>(
-      input_data, input_zero_point, input_scale, input_dims, input_num_dims,
-      output_data, output_zero_point, output_scale, output_dims,
+      input_data, input_zero_point, input_dims, input_num_dims, output_data,
+      output_multiplier, output_shift, output_zero_point, output_dims,
       output_num_dims, axis, num_axis_dimensions, keep_dims, temp_index,
       resolved_axis, temp_sum, compute_sum);
 }
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc
index 7b7e37362..4fb035e26 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.cc
@@ -436,7 +436,7 @@ TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context,
 // that build. What appears to be happening is that while the linker drops the
 // unsused function, the string library that gets pulled in is not dropped,
 // resulting in the increased binary size.
-const std::string GetShapeDebugString(const TfLiteIntArray* shape) {
+std::string GetShapeDebugString(const TfLiteIntArray* shape) {
   std::string str;
   for (int d = 0; d < shape->size; ++d) {
     if (str.empty())
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h
index fc586b1e0..608db54ae 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/kernel_util.h
@@ -297,7 +297,7 @@ TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context,
                                      const TfLiteTensor* input,
                                      TfLiteIntArray** output_shape);
 
-const std::string GetShapeDebugString(const TfLiteIntArray* shape);
+std::string GetShapeDebugString(const TfLiteIntArray* shape);
 
 #endif  // !defined(TF_LITE_STATIC_MEMORY)
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD
index e2ed47685..de4c9ab09 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/BUILD
@@ -1,5 +1,6 @@
 # Description:
 #   TensorFlow Lite for Microcontrollers "hello world" example.
+load("@tflm_pip_deps//:requirements.bzl", "requirement")
 load(
     "//tensorflow/lite/micro:build_def.bzl",
     "generate_cc_arrays",
@@ -7,39 +8,26 @@ load(
 )
 
 package(
-    default_visibility = ["//visibility:public"],
     # Disabling layering_check because of http://b/177257332
     features = ["-layering_check"],
     licenses = ["notice"],
 )
 
-generate_cc_arrays(
-    name = "generated_hello_world_model_cc",
-    src = "hello_world.tflite",
-    out = "hello_world_model_data.cc",
-)
-
-generate_cc_arrays(
-    name = "generated_hello_world_model_hdr",
-    src = "hello_world.tflite",
-    out = "hello_world_model_data.h",
-)
-
 cc_library(
     name = "model",
     srcs = [
-        ":generated_hello_world_model_cc",
+        "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_float_model_cc",
     ],
     hdrs = [
-        ":generated_hello_world_model_hdr",
+        "//tensorflow/lite/micro/examples/hello_world/models:generated_hello_world_float_model_hdr",
     ],
     copts = micro_copts(),
 )
 
 cc_test(
-    name = "hello_world_test",
+    name = "evaluate_cc_test",
     srcs = [
-        "hello_world_test.cc",
+        "evaluate_test.cc",
     ],
     deps = [
         ":model",
@@ -51,57 +39,41 @@ cc_test(
     ],
 )
 
-cc_library(
-    name = "output_handler",
-    srcs = [
-        "output_handler.cc",
-    ],
-    hdrs = [
-        "output_handler.h",
-    ],
-    copts = micro_copts(),
+py_binary(
+    name = "evaluate",
+    srcs = ["evaluate.py"],
+    data = ["//tensorflow/lite/micro/examples/hello_world/models:hello_world_float.tflite"],
+    python_version = "PY3",
+    srcs_version = "PY3",
     deps = [
-        "//tensorflow/lite/c:common",
-        "//tensorflow/lite/micro:micro_log",
+        "@absl_py//absl:app",
+        "@absl_py//absl/flags",
+        "@absl_py//absl/logging",
+        requirement("numpy"),
+        requirement("tensorflow-cpu"),
+        "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime",
     ],
 )
 
-cc_library(
-    name = "constants",
-    srcs = [
-        "constants.cc",
+py_binary(
+    name = "evaluate_test",
+    srcs = ["evaluate_test.py"],
+    data = [
+        "//tensorflow/lite/micro/examples/hello_world/models:hello_world_float.tflite",
     ],
-    hdrs = [
-        "constants.h",
+    python_version = "PY3",
+    srcs_version = "PY3",
+    deps = [
+        ":evaluate",
     ],
-    copts = micro_copts(),
 )
 
-cc_binary(
-    name = "hello_world",
-    srcs = [
-        "main.cc",
-        "main_functions.cc",
-        "main_functions.h",
-    ],
-    copts = [
-        "-Werror",
-        "-Wsign-compare",
-    ],
+py_binary(
+    name = "train",
+    srcs = ["train.py"],
+    srcs_version = "PY3",
     deps = [
-        ":constants",
-        ":model",
-        ":output_handler",
-        "//tensorflow/lite/micro:micro_framework",
-        "//tensorflow/lite/micro:micro_log",
-        "//tensorflow/lite/micro:op_resolvers",
-        "//tensorflow/lite/micro:system_setup",
-        "//tensorflow/lite/schema:schema_fbs",
+        requirement("numpy"),
+        requirement("tensorflow-cpu"),
     ],
 )
-
-sh_test(
-    name = "hello_world_binary_test",
-    srcs = ["hello_world_binary_test.sh"],
-    data = [":hello_world"],
-)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc
index ad058e819..489bc9521 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/Makefile.inc
@@ -1,57 +1,34 @@
-EXAMPLE_NAME:=hello_world
-
 HELLO_WORLD_TEST_SRCS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/$(EXAMPLE_NAME)_test.cc
-
-OUTPUT_HANDLER_TEST_SRCS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler_test.cc \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.cc
-
-OUTPUT_HANDLER_TEST_HDRS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.h \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.h
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/evaluate_test.cc
 
 HELLO_WORLD_SRCS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main.cc \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main_functions.cc \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.cc \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.cc
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/evaluate_test.cc
 
-HELLO_WORLD_HDRS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/output_handler.h \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/constants.h \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/main_functions.h
+HELLO_WORLD_HDRS :=
 
 HELLO_WORLD_GENERATOR_INPUTS := \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world.tflite
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite
 
 HELLO_WORLD_GENERATED_SRCS := \
-$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world_model_data.cc
+$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.cc
 
 HELLO_WORLD_GENERATED_HDRS := \
-$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/hello_world_model_data.h
-
-#Find any platform - specific rules for this example.
-include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/$(EXAMPLE_NAME)/*/Makefile.inc)
+$(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.h
 
 # Tests loading and running the sine model.
-$(eval $(call microlite_test,$(EXAMPLE_NAME)_test,\
+$(eval $(call microlite_test,evaluate_cc_test,\
 $(HELLO_WORLD_TEST_SRCS),,$(HELLO_WORLD_GENERATOR_INPUTS)))
 
-# Tests producing an output.
-$(eval $(call microlite_test,output_handler_test,\
-$(OUTPUT_HANDLER_TEST_SRCS),$(OUTPUT_HANDLER_TEST_HDRS)))
-
 # Builds a standalone binary.
-$(eval $(call microlite_test,$(EXAMPLE_NAME),\
-$(HELLO_WORLD_SRCS),$(HELLO_WORLD_HDRS),$(HELLO_WORLD_GENERATOR_INPUTS)))
+$(eval $(call microlite_test,hello_world,\
+$(HELLO_WORLD_SRCS),,$(HELLO_WORLD_GENERATOR_INPUTS)))
 
 # Add sources and headers generated from $(HELLO_WORLD_GENERATOR_INPUTS).
 HELLO_WORLD_SRCS += $(HELLO_WORLD_GENERATED_SRCS)
 HELLO_WORLD_HDRS += $(HELLO_WORLD_GENERATED_HDRS)
 
-list_$(EXAMPLE_NAME)_example_sources:
+list_hello_world_example_sources:
 	@echo $(HELLO_WORLD_SRCS)
 
-list_$(EXAMPLE_NAME)_example_headers:
+list_hello_world_example_headers:
 	@echo $(HELLO_WORLD_HDRS)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md
index b5bb00ff2..e65a2f79d 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/README.md
@@ -10,17 +10,44 @@ microcontroller.
 
 ## Table of contents
 
+-   [Run the evaluate.py script on a development machine](#run-the-evaluate-script-on-a-development-machine)
 -   [Run the tests on a development machine](#run-the-tests-on-a-development-machine)
 -   [Train your own model](#train-your-own-model)
 
+## Run the evaluate.py script on a development machine
+The evaluate.py script runs the hello_world.tflite model with x_values in the 
+range of [0, 2*PI]. The script plots a diagram of the predicted value of sinwave
+using TFLM interpreter and compare that prediction with the actual value
+generated by the numpy lib.
+```bash
+bazel build :evaluate
+bazel run :evaluate
+bazel run :evaluate -- --use_tflite
+```
+![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflm.png)   ![TFLM hello_world sinwave prediction VS actual values](images/hello_world_tflite.png)
+
+## Run the evaluate_test.py script on a development machine
+These tests verify the input/output as well as the prediction of the
+hello_world.tflite model. There is a test to also verify the correctness of
+the model by running both TFLM and TFlite interpreter and then comparing the
+prediction from both interpreters.
+```bash
+bazel build :evaluate_test
+bazel run :evaluate_test
+```
+
 ## Run the tests on a development machine
 
+Run the cc test using bazel
+```bash
+bazel run tensorflow/lite/micro/examples/hello_world:evaluate_cc_test
+```
+And to run it using make 
 ```bash
-make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads
-make -f tensorflow/lite/micro/tools/make/Makefile test_hello_world_test
+make -f tensorflow/lite/micro/tools/make/Makefile test_evaluate_cc_test
 ```
 
-The source for the test is [hello_world_test.cc](hello_world_test.cc).
+The source for the test is [evaluate_test.cc](evaluate_test.cc).
 It's a fairly small amount of code that creates an interpreter, gets a handle to
 a model that's been compiled into the program, and then invokes the interpreter
 with the model and sample inputs.
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py
new file mode 100644
index 000000000..246091ef0
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate.py
@@ -0,0 +1,131 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tensorflow as tf
+from absl import app
+from absl import flags
+import numpy as np
+import matplotlib.pyplot as plt
+from tensorflow.python.platform import resource_loader
+from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime
+
+_USE_TFLITE_INTERPRETER = flags.DEFINE_bool(
+    'use_tflite',
+    False,
+    'Inference with the TF Lite interpreter instead of the TFLM interpreter',
+)
+
+_PREFIX_PATH = resource_loader.get_path_to_datafile('')
+
+
+def invoke_tflm_interpreter(input_shape, interpreter, x_value, input_index,
+                            output_index):
+  input_data = np.reshape(x_value, input_shape)
+  interpreter.set_input(input_data, input_index)
+  interpreter.invoke()
+  y_quantized = np.reshape(interpreter.get_output(output_index), -1)[0]
+  return y_quantized
+
+
+def invoke_tflite_interpreter(input_shape, interpreter, x_value, input_index,
+                              output_index):
+  input_data = np.reshape(x_value, input_shape)
+  interpreter.set_tensor(input_index, input_data)
+  interpreter.invoke()
+  tflite_output = interpreter.get_tensor(output_index)
+  y_quantized = np.reshape(tflite_output, -1)[0]
+  return y_quantized
+
+
+# Generate a list of 1000 random floats in the range of 0 to 2*pi.
+def generate_random_input(sample_count=1000):
+  # Generate a uniformly distributed set of random numbers in the range from
+  # 0 to 2π, which covers a complete sine wave oscillation
+  x_values = np.random.uniform(low=0, high=2 * np.pi,
+                               size=sample_count).astype(np.float32)
+  # Shuffle the values to guarantee they're not in order
+  np.random.shuffle(x_values)
+  return x_values
+
+
+# Invoke the tflm interpreter with x_values in the range of [0, 2*PI] and
+# returns the prediction of the interpreter.
+def get_tflm_prediction(model_path, x_values):
+  # Create the tflm interpreter
+  tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path)
+
+  input_shape = np.array(tflm_interpreter.get_input_details(0).get('shape'))
+
+  y_predictions = np.empty(x_values.size, dtype=np.float32)
+
+  for i, x_value in enumerate(x_values):
+    y_predictions[i] = invoke_tflm_interpreter(input_shape,
+                                               tflm_interpreter,
+                                               x_value,
+                                               input_index=0,
+                                               output_index=0)
+  return y_predictions
+
+
+# Invoke the tflite interpreter with x_values in the range of [0, 2*PI] and
+# returns the prediction of the interpreter.
+def get_tflite_prediction(model_path, x_values):
+  # TFLite interpreter
+  tflite_interpreter = tf.lite.Interpreter(
+      model_path=model_path,
+      experimental_op_resolver_type=tf.lite.experimental.OpResolverType.
+      BUILTIN_REF,
+  )
+  tflite_interpreter.allocate_tensors()
+
+  input_details = tflite_interpreter.get_input_details()[0]
+  output_details = tflite_interpreter.get_output_details()[0]
+  input_shape = np.array(input_details.get('shape'))
+
+  y_predictions = np.empty(x_values.size, dtype=np.float32)
+
+  for i, x_value in enumerate(x_values):
+    y_predictions[i] = invoke_tflite_interpreter(
+        input_shape,
+        tflite_interpreter,
+        x_value,
+        input_details['index'],
+        output_details['index'],
+    )
+  return y_predictions
+
+
+def main(_):
+  model_path = os.path.join(_PREFIX_PATH, 'models/hello_world_float.tflite')
+
+  x_values = generate_random_input()
+
+  # Calculate the corresponding sine values
+  y_true_values = np.sin(x_values).astype(np.float32)
+
+  if _USE_TFLITE_INTERPRETER.value:
+    y_predictions = get_tflite_prediction(model_path, x_values)
+    plt.plot(x_values, y_predictions, 'b.', label='TFLite Prediction')
+  else:
+    y_predictions = get_tflm_prediction(model_path, x_values)
+    plt.plot(x_values, y_predictions, 'b.', label='TFLM Prediction')
+
+  plt.plot(x_values, y_true_values, 'r.', label='Actual values')
+  plt.legend()
+  plt.show()
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc
new file mode 100644
index 000000000..784ea6f61
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.cc
@@ -0,0 +1,142 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <math.h>
+
+#include "tensorflow/lite/micro/all_ops_resolver.h"
+#include "tensorflow/lite/micro/examples/hello_world/models/hello_world_float_model_data.h"
+#include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+int LoadFloatModelAndPerformInference() {
+  // Define the input and the expected output
+  float x = 0.0f;
+  float y_true = sin(x);
+
+  // Map the model into a usable data structure. This doesn't involve any
+  // copying or parsing, it's a very lightweight operation.
+  const tflite::Model* model =
+      ::tflite::GetModel(g_hello_world_float_model_data);
+  if (model->version() != TFLITE_SCHEMA_VERSION) {
+    MicroPrintf(
+        "Model provided is schema version %d not equal "
+        "to supported version %d.\n",
+        model->version(), TFLITE_SCHEMA_VERSION);
+  }
+
+  // This pulls in all the operation implementations we need
+  tflite::AllOpsResolver resolver;
+
+  constexpr int kTensorArenaSize = 2056;
+  uint8_t tensor_arena[kTensorArenaSize];
+
+  // Build an interpreter to run the model with
+  tflite::MicroInterpreter interpreter(model, resolver, tensor_arena,
+                                       kTensorArenaSize);
+
+  // Allocate memory from the tensor_arena for the model's tensors
+  if (interpreter.AllocateTensors() != kTfLiteOk) {
+    MicroPrintf("Allocate tensor failed.");
+    return kTfLiteError;
+  }
+
+  // Obtain a pointer to the model's input tensor
+  TfLiteTensor* input = interpreter.input(0);
+
+  // Make sure the input has the properties we expect
+  if (input == nullptr) {
+    MicroPrintf("Input tensor in null.");
+    return kTfLiteError;
+  }
+
+  // Place the quantized input in the model's input tensor
+  input->data.f[0] = x;
+
+  // Run the model and check that it succeeds
+  TfLiteStatus invoke_status = interpreter.Invoke();
+  if (invoke_status != kTfLiteOk) {
+    MicroPrintf("Interpreter invocation failed.");
+    return kTfLiteError;
+  }
+
+  // Obtain a pointer to the output tensor.
+  TfLiteTensor* output = interpreter.output(0);
+
+  // Obtain the quantized output from model's output tensor
+  float y_pred = output->data.f[0];
+
+  // Check if the output is within a small range of the expected output
+  float epsilon = 0.05f;
+  if (abs(y_true - y_pred) > epsilon) {
+    MicroPrintf(
+        "Difference between predicted and actual y value "
+        "is significant.");
+    return kTfLiteError;
+  }
+
+  // Run inference on several more values and confirm the expected outputs
+  x = 1.f;
+  y_true = sin(x);
+  input->data.f[0] = x;
+  interpreter.Invoke();
+  y_pred = output->data.f[0];
+  if (abs(y_true - y_pred) > epsilon) {
+    MicroPrintf(
+        "Difference between predicted and actual y value "
+        "is significant.");
+    return kTfLiteError;
+  }
+
+  x = 3.f;
+  y_true = sin(x);
+  input->data.f[0] = x;
+  interpreter.Invoke();
+  y_pred = output->data.f[0];
+  if (abs(y_true - y_pred) > epsilon) {
+    MicroPrintf(
+        "Difference between predicted and actual y value "
+        "is significant.");
+    return kTfLiteError;
+  }
+
+  x = 5.f;
+  y_true = sin(x);
+  input->data.f[0] = x;
+  interpreter.Invoke();
+  y_pred = output->data.f[0];
+  if (abs(y_true - y_pred) > epsilon) {
+    MicroPrintf(
+        "Difference between predicted and actual y value "
+        "is significant.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+int main(int argc, char* argv[]) {
+  int status = LoadFloatModelAndPerformInference();
+  // To be part of the unit test suite, each test file needs to print out
+  // either one of the following strings. These strings are required to
+  // be considered as a unit test for the tflm makefiles.
+  if (status == kTfLiteOk) {
+    MicroPrintf("~~~ALL TESTS PASSED~~~\n");
+    return kTfLiteOk;
+  } else {
+    MicroPrintf("~~~SOME TESTS FAILED~~~\n");
+    return kTfLiteError;
+  }
+}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py
new file mode 100644
index 000000000..224ac725e
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/evaluate_test.py
@@ -0,0 +1,103 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import resource_loader
+from tensorflow.python.platform import test
+from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime
+from tflite_micro.tensorflow.lite.micro.examples.hello_world import evaluate
+
+PREFIX_PATH = resource_loader.get_path_to_datafile('')
+
+
+class HelloWorldQuantModelTest(test_util.TensorFlowTestCase):
+  model_path = os.path.join(PREFIX_PATH, 'models/hello_world_float.tflite')
+  input_shape = (1, 1)
+  output_shape = (1, 1)
+  # Create the tflm interpreter
+  tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path)
+
+  # Get the metadata like scales and zero_points from the interpreter input/output
+  # details.
+  def get_quantization_params(self, interpreter_io_details):
+    quantize_params = interpreter_io_details.get('quantization_parameters')
+    scale = quantize_params.get('scales')
+    zero_point = quantize_params.get('zero_points')
+    return scale, zero_point
+
+  def test_input(self):
+    input_details = self.tflm_interpreter.get_input_details(0)
+    input_scale, input_zero_point = self.get_quantization_params(input_details)
+
+    self.assertAllEqual(input_details['shape'], self.input_shape)
+    self.assertEqual(input_details['dtype'], np.float32)
+    self.assertEqual(len(input_scale), 0)
+    self.assertEqual(
+        input_details['quantization_parameters']['quantized_dimension'], 0)
+    self.assertEqual(input_scale.dtype, np.float32)
+    self.assertEqual(input_zero_point.dtype, np.int32)
+
+  def test_output(self):
+    output_details = self.tflm_interpreter.get_output_details(0)
+    output_scale, output_zero_point = self.get_quantization_params(
+        output_details)
+    self.assertAllEqual(output_details['shape'], self.output_shape)
+    self.assertEqual(output_details['dtype'], np.float32)
+    self.assertEqual(len(output_scale), 0)
+    self.assertEqual(
+        output_details['quantization_parameters']['quantized_dimension'], 0)
+    self.assertEqual(output_scale.dtype, np.float32)
+    self.assertEqual(output_zero_point.dtype, np.int32)
+
+  def test_interpreter_prediction(self):
+    x_value = np.float32(0.0)
+    # Calculate the corresponding sine values
+    y_true = np.sin(x_value).astype(np.float32)
+
+    input_shape = np.array(
+        self.tflm_interpreter.get_input_details(0).get('shape'))
+
+    y_pred = evaluate.invoke_tflm_interpreter(
+        input_shape,
+        self.tflm_interpreter,
+        x_value,
+        input_index=0,
+        output_index=0,
+    )
+
+    epsilon = 0.05
+    self.assertNear(
+        y_true,
+        y_pred,
+        epsilon,
+        'hello_world model prediction is not close enough to numpy.sin value',
+    )
+
+  def test_compare_with_tflite(self):
+    x_values = evaluate.generate_random_input()
+
+    tflm_y_predictions = evaluate.get_tflm_prediction(self.model_path,
+                                                      x_values)
+
+    tflite_y_predictions = evaluate.get_tflite_prediction(
+        self.model_path, x_values)
+
+    self.assertAllEqual(tflm_y_predictions, tflite_y_predictions)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflite.png
new file mode 100644
index 0000000000000000000000000000000000000000..56b222169864b2bbec16a3882dd63c96132b9d2c
GIT binary patch
literal 27649
zcma%jbyQVd^esqzC?TLoqog36msV+{K>=wH5b5p`5D)}Gq|+co>29POMY{7M-5qb;
z@0WkPG2U|wbbyz8&e?nIwdR_0t{bMREQ^1g;yM}{8os=olsX#PH3S+Odg?#e@Do}^
zTmkq;$XQz3S;NlU+0EF|3{A<{+1}dD+1k?NuB(}&lck+4FNXjJFWcRh&d&Bu!knBo
z|NR0EJ4Xx7{)74hcoSTEIUOf7G(uz47kZ9Fwj~;xm94zgBTe^|jVX_mUbCzAosq=o
zM@dDL!hurdY^^s}vyP?X)ZI!IbtqWXx}MQhcdM6wV(8Xp{dj+h{Hv~Mt~GCYSIo^s
zddV1a&A7z1qxR0+1@Ha@<31jX>HhYGGo$=9;h8<5_JqAet1@y^JUaMukAZCfJrw>d
z@-rgfdt%4fS@=$_L+v#^>c3@iBjMX1s{iYMW~*jk{1f#3JDcz2iA8?p{B>&q-Hc`;
zBBE$2t~UGgV+Yn}-{rKm4}JKsc$!X5km+GzVYtp@@V<^j>KYnF6B}dYEbnpqZfohG
zBW5x>Iy%r~QCAg;j*fn$s2H*4Bq{>GDI~~to18osQ&vifA-%J6pIC{96t409GwLT&
zk0Xhx;F7pPSdIj6ua(_BJ&Qewya5D~*DJTT?XTZvjb+xYyx&Ab#4@X#f(U4yYa}CC
zQI)#?>^s?QR?Q6ixu5aQw5aF9(l$3|A_xgIX??3!ZmnbE%ZhFqF}MGB@O-zMbA75N
zesw6%(4Mzp{oglh>`M^}<bG3Z)~#Op^4G%<dMv$PpQNkSi@T5iW~U>3PWhzY-l|&h
zvWzJT4b4Hyyl(rtKVCV-tg;DDNJlL7KkFPLNr3ZRd+uCA>hZ*0zl2I4HvTp0Hpa0E
zJ6eI{ckH_5XoZeT3f2OY?UB?qD_+a3V&~GhksVk)i5T46eQR;LDSiZ$q$@d6*Ey1V
z<2DY?_xfwZC;J;{+-K1n_nNP~F>nYNSXep_T#h!U{{G1wWNi2K)%*G+etB5e&TMbF
zH{N^ehUM^gavVY$23A&9*$v%t>&_1<;vVux|1hL8Aq;&_7^y{EzrezbS__C@9xvPw
zH(%=dsONWa06U=i<2nCwPvXKa^R8GH&0O_hI_b9qU*t(*K77~{cW2qABB<O|Sozgl
zGGhMc{>r2fmqQrl1HS`D8d3KFm%7u{JSiEO>W}zTh=HNQaL#)Byks6Tg~+te?mP3b
z9Qxy0oK3e!km-JXVI<6z8|4$Vhi?~$up@$R++h+HK8<0DMZ$u#X5;pCeqhcuZ19b2
z$UNMfdUAHSQFXGMJXUOWLq+_ALCouL5f<02EK$Nw{Nn7$q9v5bdb~93^k8i;ONo+z
zR(w4#{$6Isod2)SbLA7WhZA$tUiZXlWTKWQD6W>@|6tSMzD>5fXDZqxCBE=XW`|E}
z_ef<k_(p1-2;DVYT-B=LlK2uwGvr7;F*Wr@^`}pt3dgK%&053nxRtT%SC`73r5qz~
z4#7S2PgXgdZ24WSt$OHGIWnT<=H@#8{T)oqsG#~ylV!7Vf#7gs!ma5nJv>kCvrGi7
zM4Ds>L6Pr;rxs!V`N^K;uTNMITE^409%wx&LLZ)&y;A!kNAzm=dnct`?VM}>;^K?^
znoZ%iw^$y3mU$=Y8T|cC{IdcBrS3R(76^LACvjhtk_FBWC+wWJrc<BhJ>$0g%`jp$
zU168LBqn~oExGz9S5is}^XWHD<DWsew*9(a6>iH!(ez3ZinYL5ee_k~V}iJkXXGU{
zN7xQYBir-R`x63zID`T&tA89G)F9Km6q5yP^b7@UCr9D<v>&*bn`h-}7kwh<HCK!@
zl~+*_^oPItcT5@5pD6J!zY~o3JJO$EmCuds=xvsIboB0z;D)yK^WsT8`EQoPYy+Ii
zi}M#d?X>2wsmoCkmz67CU$Q7jNS7}SnMEgcHmyo5`UTX~)M%ExjyC!I$7(%FmKHcP
zB}`iW(P;4XF^%AEkD?XSuXg6PnJDLnFy}*kWh<xhWUHhLl)W0_Ar#wxe*!Dg3rpX7
zaWrH1>Q9cba;osh1>{tz<p=a0=5+7*Zw2aK-=1tY6ZXPAyhJ6vy*`nx1V}NJU8r@_
zfbHY8=%dn#Fzb%vKbrAt7^`;4)2VeY%6(q;ZNlk?cF|9?<%$_!-Zrulk3Dk|A|hkY
zqfMJ~>oJpZ+o@LuUK{*--JA^xrtOi5SxU)C26diQ276;A7Gj%4#?9Cyq@?Bs?sHfr
zPDo{5uS08%(wAzee@E8x^s0(ko_>o><S~0YR%R6s3mt)b;|A}H-&Ng`SM1c$3tRDq
zz*nCe%B)6ySG!nM6bsmlSGJ+4q>J~qy`v=Oc@df?&c+ro{6n|Ib>n%#k(kTsEm$8%
zpH#@j6#-AS6%>O4FpQCX;q}2AwGD>-f(*ZjDBt7u7!C$2t3~k~gs*CbmKmBf(w+3@
z>%afFWDu8#TjLfe)_BF3#Ba4+UeKjw=+o*lq932<b&Jy=<&$){<LV!lPoF>QFKyNC
z5$NdXKrj#Tzd${ZLhSvfu8$9@4u*9(-M3&hcILx5)oa}yC<JYLX#LL78zDP&>bp%X
zy%K~yW>QR;WvyBLmN#1OBLb)Svu=fL?b&9{3p2Cm_wOG@(@E!OWD`JUpRV)r^4Tmh
zZU3F4MqiPZA?&hRd1+8-Z~XT5gXP1denE$MDL788PW@syjr=2e%@7Ubg0@ssHEwRO
za{1IzVuxgCDiRlb1+}}^A3Xb>A@h#nj&~HtGgT!X<tI-*d!Ovunk{w5Gt<)tN?aZX
z(#uA^PfWBNV`69TUGr&=AeYqCj9q&h7PcdQPR!7UFQM-%<h<@K-}z1u8Ixu|D=t2}
z5TSd2rjq!9NJRf0k{H)@D6jd%t0Xu?vg}nr7&3jiKfOPWT~|+QW4vr{eR{@w55LB3
zYx(Vi{0zvKsFhX!s;~p;ve=?O&6;-NyAJQ$JDkd$F^rH5?GhyX>L)C8AuZ_nNqFos
zcpa|a6BnO85QQ{lvDguvj~pv9#W`3Va+)})upG=9Yb!MCCWR$&;pUt&gZjf~H$C~|
zyT{)0`gmD*PmiLOPMzlgyqDmYm9<>PM?--N4<z5<W%^xR<`?kt@Wd_#eTuzB#)ElT
zwA)16$0%Y<_MDBp<&F87WvO|wq3KOrh8pTDl@M7Phr`LBkmgd$fUP?8(6F$~q$r*r
zB+SYKg~pO2REd79P?E}{QXzN4K4d|<jJ>b+^YbMXW;ZVye+YL}!KrYao142sRP{k3
zYPk{$@nn7ZnDvX)nT`1N*Q`xpk&(*-nUc0<Aq3QPJUlTFfy6X4%I@x!X36W-15lI!
z>1@mQ5MxpigoX!lHtct9;BPi-juoNfkJPs%K9E>07LR&7lHatWVbIb(zmTa!i7eDC
zdzE=)V+b)+L4)hRR?v`kvWL{j*At?nr+0E`NJt>duRmR-Dq}aS7rp)9*<w4b9|H$R
zU$N4U=Vk6^8x<!|5cKO1Yr{Xf59?v!=~-C9uFjDXaR<)|##TMF3Jf-$m3tn%%CA~Z
zdZJLj`&boUjo)VckIf9aKV+Gz>apv0I24=T;7^UC!aX+DxhWrktG3WCBA8XxvDk)o
z7CJ}0X-XmHk@Ty!U;m35Db6oz+i^<WrLL;#Z2Zb~q$Zj0AMy#YBZODH)Z>&X0(RZy
zf<<#=U#hL7HmrJ)UYtJU|N4tVUDwUY&S-jBgwryfgM&jXi^dy~&C0-n`qQ^{UPr}%
zQPSM!aj86NpIu5^oe-L~MHufccAi7N^gfz$>q!$!h6;Yo{_5h$v}9~$DDTchg&lHG
zMZ)a-cs-;i1dsf&g2Hvw37hdfV~5Norjf0@)DcaeKjLj`HUbxB^U}k{BYUW>PRv<<
zOeb-^jp2msTRIpqbkqGt%XKH^jW=#2t%R+B-Ly683yl|!Gb+#S!9~bTBRrN9EqVe8
z#aCWkEksEq0h&pC6o{P&$R>eBBP#*GVu|x#b**Gb+%o`c5`>(V6E-F)l0#^HM^WMS
z<?Gia1jmn0*NJ@2mS6G5qJO|$7I~F`5z8w-`GO*P{nz?0S@vgsYzEefgIR5w`PPaT
zr&YcN7;Tf@PH58%(d$dCtlu71|9PtUOzYLFZxgJnrV&Ek$Au>iS7#C$klip6AVSFC
zhwj@Mf5*%6;I1OVLPNQrMDxQj+dVyTWXY>p?oGC!;CpEV`);ie$7XqXP~evsC4SoR
zv;k5#s@`cpG!AAw!q}S&p*630+eK#jEMLzO(y%!MxndYG<G<zkK*Vj6PjJ#ckmY%4
z4tfvdM)s{D`?<#&HLf;JV`lM*3LjY$9TwWETJ@_O&Gxz;6jXj3DlnXhAmjeIE7p<s
zR8w5ZzORl5X*gBw0u^oWhpyn@C+Qne?I~wC-%$lmy~GJs(lm0_8UMYdN(a*t+o@{3
z)~}e%n3IlA^5Z<x>3SmOEu9#14U(trB{|~Cva%@l_6L{cqOaR_cW}CYe1J?ZE1?sL
zogtyZ74!HwI~u{==6508bPqCYPR{+`gM&_DwyfyO_6Wh{n#S^FY=n#h!OdnoPoBg}
zgNbJa1bfSmtk+wEOWobCAKvl$<!|RGsuhwcX}s7G?)5lYbRU%}-L?$!t5;boZX@Ud
zV*aaG5=6?C(|C3WWi#S*_6q*_daFIR+qHX<iSC+hHEzrYSX&P9D)g1($7~;}Z8Et#
z&*jV<_bHom)Ks%0l{aq@?GP~{h|j4A?mxY5-rrZAh+dyXPOaVJ;%zm?#M+hk=lmqw
z$zb;@gTNJ^1wn*1Rrk>PXN>SfB|+Z|g~QIi!;QWmo0WlD+{aQ<da<)|_LKrWiJ0ey
z$s<hIqa7|S@4J+~FKY<9m*Nt-Ez7n?F6pZ~)e}$~Eh`?!5{R}?y($#65EK3^D;dvG
zkE<Z{#b5n3eNMNf{}m;iU<ok>M{0SC0BvfS2i$sf<TjIX7L-MAX%pR|}?xW_+~
zSc0Z$N;{G3F~?xhbS-s~g)SxpZbEh3e(*y=@Qsqx(qycN7xycQCqL)g>LYw7g|7dZ
zIXgE=FPuoFWS#LY*;}(3dxS&k<i9({EG*{P=%Jey9@^20`z<v>8rFnLBl6ApJbx8;
zBWLWQXT8f>ndKNUtJa&EEmwj&SmV1(s$Z|4?|*O_?NX-D&6T#PKT50op@Xs16%f_`
z61qK?)uw2FC&cXE?9}eE>~_5Db*z}=0!w9&$m*D6N?Ut^8$Yz?sFrZwEH0<`oXw`!
z6d86C5U)i0sPu6*AZGB0h^k&H*fSWZtEYtcw6`>l7TLFNIz-I2T7O6;U|Tly#z)X5
zr{GzQU>^-{*SOX6-`i|u!{&~&R&gJzqh!_Gc6muRPY>-GAi>e>el^ID20jN*)5T?d
zvoK}P`{wKqtNCp4!ovEB-Ks(y@Y*JRS!0v6AjH37j2Nt~7o9AU%=`%Lvf4v7vsav7
z9AILrC2A6=PM-z-{#{zFuy}1aU;l}_I|A}g|AFbpyFmhsh<Pns+&-G*gS9J;GhC6Q
z-ACnOf`Me+gw!}ODR?tJcP7OIrWqKkn1yf(2g6#&2({R?(Nsj|)&`_@e*^_#()gV_
z)SvG@0o;m&cJ+}IZshU1W^^&H>iFK-dnO2@#K{-e^1g|7#N_6RCJNs-JQ#KV5Q_;9
zcr-#8YDO?stm0_kg}*v-1kWB1PpJH6Z;)V$Svke%^893fb1E6o5^uXnUNC?O5=zP^
zfV=K%WZ}6@doZIx7rU69?mM5Go%Ykb#JEJr@4DF4`5xcPfyNCQs35G+Y&iI&#>D)*
zx|q3A%TgqV2I?hrI?zaTIZO^_t7JpIrpb1iacHL&-5-!dTOTV4L6x7~rEXO2G%+<j
z0gzZb?KD&G16?X$>UmZX*L5=3qmK`?s=EMSciB`-$?VLxvOp~+6hF>b+qd_JHd4L9
zR{t$2yTae!UoEGq`I%MHdUmXCFe%u#*rj>2U98D`x!G^s4W)DrfTPKluCx1}G)^z;
z+C<SYA>>L*NP?tkjF61OR#;!f%;V0O%YRfB=NT9f@!Zpn;d}i-nCTKrIJDjmdz1Kc
z)jnerif*HonD@@7w?E*}DGn0Zs{P=)F<t~^Dkdi8<zyu{8m-Shk!wxr9Rv3OXsR5y
zXPc-bwsscU{|^74H1xaR2dwbf^I)~qY9w-@Jqo$9lPco2JYwi)*828#e(`77sA4E%
z^Yim)7P}BdBJj4OwI00C`FsT=y*|?*(e9A8HCkjk>DbM#=QibHJz5w5Y=Gm+z`g2o
z{#(A=vyV1ww!~>XSMIs4kNyk9*YayRDrBE6AFyPc1SpdnHT=;$k}NZv)%lRv`*$Ym
zN&Lf*b*J3iHq9Tam%9xa(udj6DhW4N3K?4#J1ytoKZ1g*{0iQLh;;kp=#-c*4`~_l
zw~;w5_mFGAR+pHxUf1#g;I5>nmon0>m9IBeyW91^){I^zg6m*y#O;CC(nl>tU>xeE
znR$5!^&DDno5Mcl^Pqaqjx|$M&%8GvNmx~%CSa3<>NGohdiGBf0N*E!7<l#aTmnF$
zXtbWL9ea8;Vb?(3c#u|mJRgoCKwN4ppctd~^recRS|NNRh+-r^wqHlyV9QV-y)QJ!
zfJK4`@JvfbHYIfvdHQs!?)BM*h<VMXhb*}+P#4!qv<T@j5?i}&yxx1G+tK5kkG&ra
zdlHk9(&KY9h4V1;ad0Ix#d628{l5Uaza0AZG)>|tQ!)evf`;<6m|0ohc67)AI~AQb
z{jstVfodk9`NV3h_1LRCn)brI;dJ>z)L|YIn4B-qemn;>@_N2Cyae|1$!Ze{*lQP=
zV4<i6eSWK9%oIrzShZ|`4S#;<#>0d0nzrHjb}uKS7-eJ1diTxw?<}PH`F&(wX^gRU
zyI((GIhP4-_3UK-EXL^HR>dv(zIva&g9EyzFP<*{h-@t|h*Eufx;Li5#K2&GxUPeu
z_28NAYv$ZUL9}K<2?jASYJULK%cu3m#>O;~JQjT^4rkTShgpvm2g`q8Dl+e-fFt=B
z&X|zTsdHp{HrzXzsdn)TT%bR^4~Dez=UDs$FmQ@r{-Q&14XADr)+$H0B7X#lh$p+?
zJ=|Lmha~{G_5zp;+mU?zn&Xj%OKHfG@clwbzxZc3NVSLKR)2ET0%60Kk6o}xAZ<F?
zj3ch78WCx;W$Pc3i%g-d%6+omddH8JRXp51>HWHoxbi|<pQN1h>)<JZ!EBKa6!w=w
zHc9=Loatg-aTU{^;kYDBl|HI|d%c31-?gJ*M+p7l1OnP#JP=0_QM`jbvfqFTjERd|
zj=O*EG|$lpWTD4=81qZ$8i5wMM@RS5=98`REf)2ln$7BP=%99{y|#eg!Ow5F^sGRx
zOxKB^GNsGfFr%<=lF!+pN1Y*%SLl<=#HqAL0CM}wZ6<Qmzg~yvajM~HICpHU@>2o!
z?xf%EvKc@ji(VyHI4OJAkkt)R$X(e`+K`X*!vc%&`oya|&K=13It(+hQlshT#w=3(
zW*)KylCAM~a(|Y=8j0(r+u7-(X}kR(#jmaXY(1BlgydwSVkR^y3OR07;8JsWvE^6u
zCRZkctjTMuu05299`XmyjL#7@XTy2%nmA<dIUp_;8v`)%F~34zf}%_#s72y{cH=VZ
zx+Q+L&Rc;jFsKu@Yq$`Av}|;7deG9^s;_CMrl&{sB#zB^_`8m6oggx8;d7L@Bcm;f
zJ!$`z=f?`<5`a_;&bobK)bGL5<B4qD$rSfFwclS+LnSJ8b@kB_3&M22OCd4O{ky!p
zA0Ya9J@;P<SdTVdUYwn*e2zL^iZ{%G1Cej2tJ<;F`RF8oiTq3@grM)_jb&`7@2)kn
zFMxpmWd!BX(FBjb0~1N|QRisCIOU2xV|w=o`8MC-Oi7$U$fn)Z@aB|_-Vh3HWTvQt
zibC2bc#FzpP9yqmRpStu!+-L$`cX}UpDz`h<e`?9mIPR!N#~96p$H28E}NJgAb1p^
z5m$sVsR)f<?`y0ZqD*W~0rG8a2xexj9MvW?^Zqn3TNf0kB?yoTh0}pS=PqcttOgP=
z`0q$TdLC?lmCu>$V5y}BiVpf0_BUKz^xMP~akrs*uVtX9_!t#pxunBM`5`;-0UP?I
zv#W~-x-UVS@ltLSR|N$6a*|cPL-L<>P7nUhQFBFK{VSIp9v+_1w{Pjh#8QEqe;7hi
zOhtgq^0l&*A`$T1s!J53tadvxebLlEXP;*5a$4#nbRkTkySZzPru*tu0M+n^+JA|N
z|3ues&z;(8<osV8i6tiY+u^H)^_SY*;N)UETg=iGWE3P_cSX2#30*G>(3au-<5<~+
z&1L+0M&KC<Q70#G<thwJKZ|S#-tcsJIquB{_ANBD=Cy{W;=CoWV6d_@nOYgr>U<lv
z)7|!pj_xFwm})2Np5!d=xcL>nfM;pL&(0AOQ{zd*%FoBewO_)GA3p0(TkRwwIteGH
z>U26-ZDQADc4VKZ6{HYU-ji`Oo_$!k<{(m+@F*+(s@%BoTe$*-wA4e~$i*V&G=Jk6
zm$|3d?zBl1BEu(P>{UD-JTE4x`|-c;RePUj*=Ue!C`mr0qXT%^kDyXLEX#B@4;aDV
zOBAblxaH!MOUmX?6mBY?5n>iYf{iyl_+hQvGDpEN8;v99<K2&1GvTOTTsK^p)sgYq
zs45!D{d`-)BSR9Wus5YuoK=@eXs*$sK~HutD=CKl4(a8{W^<L0`NNiY->VBZG>)3}
z;wRNAnYI7OU}g-&%Ex(?RkG=?7N|Zp@D@dkubci=igK8!&!st7LOedNrfF&E$+9y{
z{lzj3T1Y5BtR09_d+7eE`uK=<1WoZ8t}aDF2x+sV!ie)1&rB>oOW5qkGZSPX?vGRw
z&rO?$L{MgX2-eTq8=07}L8T4o`r?Qt@{Zzfg!0ZNkiVdt2u^%PlREzj?;s^BYg*!Q
z`J%fQS>b8Vpt7D)m~fr6^BuZ%lIFUIt4KMG7*~mf7Xv(^_vW(9aFH>FV#+@kXE=0n
zxUP_AvmohGWX1TyLw0jiA(yERW^-_mF+Ymjvi=4H)6f<sjv|WP#g;@`uo?2Nwc55*
zA!c{%jQpj>okYUWO0967#2t$X6Ge*>Im_}ZCh`OW>{JErJc-Zpv7=-0NIAgN7V{Dh
zJ%HVAWq+sZYGi#T%55%o)E7k`*ixA%PNuZBCE4=EP9BFh2mDZ@k20Yg2w>#<DI|XI
zH(NMJPwF~$Ix}Jq*`Eyz7&OYSfoNBQj(oh-vU#pKgnhzq|CcNkO82mts{RO!gRtA?
z`fxsAGusU)2V#Z|w}kqeZsD+N>C(0qR?CPA_J$pKDPoON+G&$c_y<oeoqBc>_qG#7
z4<Pq2E#_Oo_E(3(X~aBLp=qD?-ctf1v+cW13EM<G>(jR@D=$aNtjBmz>}K>VWTVQm
z2Rg-H3Jl+-Aph7@c<B?TJ>9?F&_;*ocYbN&WAK!bO0<J&WaQN6q15A46_ONy-6akS
zGS4e)FOEpw1pfl&7!YOhXD<c-0$X)Qjweg;H6IJ~s$v|Mx=^(Y*z8_NPpbzsKpq1F
z*1Qq(<ilkP7wc8^SaBcP(&-d(#=pgK?s2yXC07WIXpmF<!VldGvJathXvcg_LXM07
zBqk=tavDzms0A(+7#IERozBiqYgnx`=u|#d*i8IekiUFkFz`Jw11!{v0`;ITEcvwk
z54~D-e1~z?{^m*2!9XnfaNH}E)XgL>lAEp~`x{m|x3LwonYHF;!#PWFGeU#PGp2vB
z+`AW0?Xvb87>bHrsBrNB5>rt>$kkwmS|~3sZw%-&?g)tTfBydcoBx2He~9o`NWKJ3
z=#enr0lQ1izS1Ydh3LK7Lkc~ocV<r!+X93}5qm9eMA52CgP7S6Y(XI*P5`khc0tV{
zqNYyWFjYwt?L6CRQ1LzwX8s0`@{z}^tI+Gnw)-?R^dDHgDUK>R7M5E&2NK=i*k<?I
z<5%Br8@-QREc?co_fqQmRxJin$%cdommG+g|6~E>PumG6K#@iD5)gFdQTl~O=&%>_
z0XQ_3vwH|C0^f@REmqAO`%dmBhXjhc>t#pFQ`a%bMRmrMRH1pVk<~3krzgLT$X<Ub
z{V_C%E}c;AU~48FC{`}FO+9D`9&Jt62?2KuwH7S{NO8QPl?N(vav1;xQF7gn^Oi6Y
zJPN*0Kr%&kGvaV3l?#y~InRDjK(*O9=w=xuCeC@k{mE=d$GY^?PC^2QVT=NCGfA-G
z`>8~n17n{&B2Vj#_dXHP)%J-R{?VipVZ-_Ejph(SW_I>>*!UEM(B6!+EP#mTMYUND
zT1frnsnz9?M1SUcI8i8~a=9<H8(97v?IJorZx2vxAyl|}Um>jc=77IrgFN(o*TgLa
z18Yo%yhxl4Jh1wbo3Z<5tYvtEY-@X?=`RVNNVSYOYX&YaEOvC((9G^i@3F15=^dd<
z3Rv>U4y;a&Ii6Iai_hB5|GZOZN1G(VNcV_yCGypKZ`(=4TMv0#9^B7M@7IwU^8V0-
zJ2=p^3S`{4QN_U<OlA9okuJvWepB?i5LH4`VBMz29v&yjpk~7<PF=jgwPa+&C;XLu
z_suFCV&Ze!xX{o7)N7TJ7Wu2lJ5|XLs*;o^0aFD1;#N9CiNTtD1T?<d$yZ@=E2^*Q
zr-i6qyi1=|v-X!4?b|b^|C=4GanjxqJ@6@5*5O|9%dY3e#RICat9Ss5hwSXwneh6B
zRs?1nCR_LaCM6_+5EHqiI&|i2jJ80A;6mdL3qAHHrYeei|1c~wO6QrG$wKC@p^he|
zL1OE5*97iBHr+~H2LxASlk>DhnpA4(J-T~PCEXnn8CskLPIhU_X12bn`hq9VyhU+n
z$>7E?+PFhoHTa2SrFJFY9xQt)wu{AnCyTzdnZ1jzwB)4aI8Z!^tyiX`kD!aWVre2G
zY_KU6FXba4Go}0%)|#2|<WBh5Q)PDFq>T--2Sm}{lDLtTP6$NJ%ObNO6Jf`Gv>H#l
z$B#k?Mr)~*y)ZhdC`6T?v`I4<YP_cRdm`tbK-RVS9I0mJuO^y3ovgY$bM^die$>AR
zFUw8V3@+&hmt+pX@I2gqUKcB%xk8o)uOg-l-`K2S|B%9E)E8$ZFTdlaN=N4bS4Oax
z)5KjvdT7)obUO8)aMSEWVckHe&pq${rvAlg@i63^>V2r(H0+GBUA?7FO7Odo#J)0{
z^_I3%A*P+EU#7JLPs@~&dz)_IKMj++1Vh7JMy$P!3Yu2zl@8_}+;EF!#j6m;OSN(@
zoXFVPajTe;jZ3vl0(q&$7`V}uu|Y?Th8HF}6vK~Xd2XSZMQt(#K44q3oxaHObdX}0
zQG2a1fKQ2S;AwZt6YP625RHe`!uk1o&EKB^TW5;9c3gCH@p8DzkCyEl@2|~;yf7Ji
z$aXY^L?$p8eRHvqi8C{#DU~BHFrXsp)QX+G^}^tpa_U-L*gI*G@EgQ6sfCHWwH%aJ
zbGNDI?$Vuv5U0Gh{;S?S&;G{(qewAv9_h}oJ-?tsVpW%Hy76`4f+_+CK#kWLs!#K<
zK~04y4M}`d<GNbMlA|C>YB3hBv0NcGkQq3$0PGx@g8lVRLQY2gG*KoPJ(-l0p-Wen
z{ab4C?{fyl;`F26iFVoe>l9<!*s;-_1i+;`x?AS)`DX7eqYczn(Jn-1eE7Y9UvM_R
z?q^rl0H%~qFt}g=-&NzL`)uP2DVpHS_qp8Hn@N@0#FohDMY7o*q#-6915vadYSq@q
z&2&(wNP4Qe(7Vb{GG>;9@`hcx?oTZ~rzeiq4~I4=vWTN+voW(Xat%^@_AbZp3;k<y
z-MzjKrTM8pWY@W-`|W(f)PjoA{?mkMz(4X*4DGOjV8i$lmEI@1xv*|{wVyaj)~UK_
zj2>@DGkeUi5?jB()Ln&$-x|$;6~}!AEAV!6D$~LA6H}d>)-sVYsVTXf*&%OAG@6tw
zj(`-d_()4@%<F@<-!KN_k0tJc?MD5z#?3EZ8Yio4r3H-8NT#}+*#c8I1xO7FEbBgf
zoA0@E_g>Q$oa)6OSh3l>0=Dqx&QBD5x1UZ`TQ0?Fd!7H#ULNSB?<_Kg1B~nvC5pcL
z88iE?wuxBmd$Su0huPnvQ`RTXR9Y0Q*}ks|gxu3!Pk#|H;`32T>RKl!ZXakus1_CM
z37OFJGcYrI*@%E3ZY_{J@f2`Yg+LqG8$w#;;On>VLtpnpx57L=eyJbJK;i9(%Z#qw
zhRRi#>%n9PR&OFkDvnmkcizFQygRZ|uOs_OQb1Q5Z1D3#^`Ou?<rp@ILp_<>o@+Ls
zB_Sv8YzoBD9|380@tK6{x;7J#c*%D4onY^%f?gD)Onkq~lk5UR<;%gcJTnKT0S6rk
z9}fA*3%#%8I;2YiAHJg2Tv%Mio=G`H4m#D9U!QS6Yh3OXocRLmN>37h_nIk~aDM&z
zm7fRLBwyGR4@F#p^akZ^GXu2xTF|S{!F}U>zS9oH=A^sOc>Cfg?&;Re_Di;!3Z-|_
zqUUPkeMW81l@S7V^|%XdCt^%QM0Vauvnk>}N$)5HL7QJ|Nay+=AwEh3C=cK-nV6XV
z725zQR;{A!Ezh4PO^AI4j`a7Juo15`VVAcN5qK337KNw`{POy)z1bx;Htv&bm3I83
z!<*1}cbcj1b00RBd*PdJQVcKu2Wr`haiO8~YS5>5x%prHF@iSy{HWn-Db;QI-h&6>
z=DkUC$7?vqs`jPjG*I;Jv9fl#w6?V9tv<QoJJuubnT#MXLe7OW*&dlH#uSWgaJjNE
zIMy7=(9523LPT0)PO*N2?fPA2r3IqZLm()Qp&h@kTh0c2AGnza$a#YH_nk*RCM6|>
z5aF<JI@$FdOrzmZueQ50FyK*O{&IWX-->Qj%|sP0XTNRv8r5e1Y;G2EUU{#30vuXs
z-&}cByuK?rwBFP;S4nJ+n+{b~7T{k2=Yp-4+}f|^T}h@JE3JwIM$8<_b%Kr#wzVT?
zdQ=fRp3mz-LnTi5no!gUKBWN4B=t1z<40*w7y-EV{u1Ni<vjt4AxAay8k&~A%Wc?v
z+oPV24itSMRBs{Ygs8QBwI^hgAT5f<S-04p!0;s{Ih;2plSV!g9Uc@-iji)<4=JgM
zavT9Z?MW4h2LfxL#?4MSO;kRLMhtCbybL9oJCym&1>ZTIxw>oz#=QfU%zZWh2W2H&
z87pC))iXX={Gh_)xF{zM(n0;%rbxBxh5``2;OF`PQN3MrX4`FoQQKaJ=DeOWO<Cpc
zDk8f351s`By~i+nXA$~x*+_;H@?kJD-L;4J3?-VxjwYLdyZH&{M;?p|AcV5%SO4Lc
zcfY#42nxPPwYUH`463;(E#(81Q69n^h+BL(5w2zAqV8P4k}rY;wv6JD-51`We%VyA
z@ftHD)^I_^ci~5B(^(>D{bER15(a$A<rLe!)Z8t-M+bxgK)I*RM2$<UOI>R*k0_#2
zAT%Q<Czq0zj!sLXdA{e6=1t6`RQ&yUU&G5**3EiKQnqb7ZQ`W1sE~s|aU=92Z9y{b
zYiWMd1>9kBFQJok8~E`l=1HN$XES)pfxv2)kCSrK<cOR3k=^FfL&)?qUhoGBD%d}k
z-do;27>Rq@K-4+mjI2L9r}_|7>vz;`?zZ5vmeQ<AM~9w)$$Sflh9v&Dr`Ru>@1Thu
zXV99)T$^jbMZW!}vNEe?fuUVDj>@3^#3=*YO*nC_eVMto)_3FH-Y#=g*fls<?URt@
zne^;Dj;8mGl#^<a$5p<6Y!#KU*Sq|P#)JTEXU8`}_IiX<yCD1$*5pRj3-6vM<73)V
z*B-+w<y0!zE+Zn*zzT7L^O*N8iySuoYY_EVqdYjs`@4dy35IVI?c8QWoTN4fB#JkG
zMv!QSJ6Txx)Q1+}(-PF=C}#~t0SbpSfAuI#E+7k&xk0y<EO|#SM<gJCs!_SZcHBzI
z8!AH5v<=cY%PG#jA6v}pRgQwxxdN`T?{J-n=wX8Fj?T|SMF|@3fwdw{I*-kwb|ccA
z2s3||z>1keyN7h@87xhm1?uWXn|e|6B|$s?3?lqfBdIZN#^2H&%9GS4(_GAO2@Jk*
zLHR!P7RYS74>(KSF#qazF{vcby5m<ok&4vtk-M|@?n=PMF>UKE-8>Gwh(4sbswzF0
zwi4Uk_H%)cUCRtuEdJ&Ycwdbi%}I<kRxK(xIY_k-#?CIv*0}1BTA)8!dFYPVUqjBk
zm+BMpCX?CzbcAgX<3to)`vsF(g4KY^X*!Y~o2*CMw|p#7K&eiU7{ULlk)l{A$Z6oN
z-y3>5tXSfdbEbRfww36VZT$WBK5$`kN8;j{{JFpJoGoCeh@PskPoza>IGKcbHWR?y
z$Ix4RaHk}2r|EKWdv2zb?}Oh_YPVxzUjq{)+5$<_nAdR!{Wcq?4xmb)?}qiLh;~?D
zyE;3fX{3$Gz7-OpB`ZD1W<cOkF0Z#HDiL<ye(GO7N=lTj{`!L<f{M%LdAX8-ua1))
z17dWeZ!g!AC!9XE)OLU7>QwMP5sNB3{T-`eZBmPCYc@*cg33;UjFot_o9BDn)1|gt
zc4=>NrCKcDMNWerL4x8%y!J*J^h6GKC~u7D<5-pGs<}+qT;j<~UB3x8)%sM+gAKp`
zXWRWZMP6?HCd5_#D&h`(tz$AaYuLIK!co)!n@r(>1A{PoJf1Z76DxC?+oOr}Cgbo4
z6BVqxf=$K&Z&!RwyIn9eln|tO1Ha9nZ%~LpypvN=VFvziVD3f#oWH`yS=uYb#L>!t
z0|<LkcB7@P#$aa0djvQwEsf&qn)8hw7w`S#m4@&<h^jIsy?yg=m`8u;EW}Ns1znk1
zx~GeyhsrbDS$D0x^vro3xK%k^t=6A*iLW5_IE^^PsKUEy<Xa)vh8O*bWFU5*#KaDH
z;xl~lD?JDp1{IbjTyp{(^Ks{>d8a4M*(zEE^^1g2;;ZL-Xs*P6jiy$D@0{Ox9~vae
zh-mu$^no?025<J>KmGN_BZ(R@b}em>D{T83{A5JdWWvJ60gdcNz$HH>{3CI0q*Ipi
zES6&f=`O|bD@CZ#otxpe8fq>2;hN9VeP8LMV156ZyF4vd|0ffpG@@jWDN0)~fv2?r
zJxs1c{WZxKo=djH9o>9%raeS&%_#qo%<hkGPmOOiv$+Wh<UO-2E(%@5ZD2q^(aQuo
z21-3(Qc8NClCoa55kbyd`8O;1zD_X%%F+q#Hq8cD!6JLjoodF*ZPwP}=OhSlT9Jwl
z6Pzt*Gpg~~3Exf084TA(e+iAblm8H1wcb8@K}{6>WEQ9-l!OayJ`uf4q2(ac>PUeK
zD6a@`r<k1q{BQ)SzX()1lq#*Lh=*#baR{jFtwzdibO~rgSE~P$yt?vOWB(a(wpAf8
zE6=1+kmmF=OV0N;<qh`djRJOF^XsFTfU>Gyh|hFZ0zPbMYui0tE5LXUB^TZm>K>wB
zbgu`CHa#&>9vo{P7Y8HYgLyE!-XsJ{E*SrIq+4(O5g`|}i_|G;Gp!gY3DOAmVN)o7
z&=l<rVJ2d#E|*nw(AOsB^)rI}lHiHdYrTQ>7v87)mIAhuav+>u`Ti{A0zRvEoGL%=
z7>u4^^1%Z?tK$Phu;K#81B^TcO3MbJb!bftsC#J{nKu)+V~I(c=&CvyB4iWV%4}jM
z<z5y}-23*F?JTs&w$L`!4}&=R{lXw766{JZ%+1y7y*(;+L7Dy#7Z>;0OY~xY5Okwo
z?DDTB%GvGe4oHDess&gIE}6Fzwl(j16uZULfnmV|4{QDdrnYXvp@)>>lf~}1Hi;yp
z{XL1kk+Llz`E{hHyKk0a97<gB2<C1@KOz2;mt)}hmvP$f%7>KwxfCb~dP^Wqae~EE
z1mfW<P%;cmOu)eWRN%~3{$EgQHCyZb4LB2f=H1`;SYM5p;0if7Y_m|0Do{rWQ#Q^v
zjUTH>aU=l+2$a-K8k#-;T9aLb)WTBE&d%jg)Dpg;C_(1xZ00HlENbO8dCz_*fcbQ^
z&a1-A=fAMm0J+d_M~>I%<@`R{HKb2E!rlS!pB0FEo~~jRA#h0&9li|I7#Q&RtViDi
zqo<|w>=|ll_<GH=tAW4S1wkLBWmE9Ka&AI8AgInES~PQ1@2SimiiBYB+9a6`Q6z19
zQ7hF(5Ab=GI&~iN*->7Fh3YZ<zga7Co|d6YP=%JOkZGoXdQrx8z&gKw|3(1hMnl;e
z1~MLf*R6;IEe@2v7HYT2M_?6xIhggXz%bGdY+iLSnC>6ZMb8TjKNIaFL#t*YP1eg*
z+;?UOWkFIp50im|sPoD@ZFUk2r6?aaP{1gG287U`zrQ}wCy6@Rn(0S1E*n76b^RU8
zt^(I;u4?A%HHxc|pyvN%vuqnKT*3ZFF~6{2Ia+wn(C6Uxf3R8N4*U9B?L1U@W13J>
zzWgxUT5tW^ir05}K;MaGOxERMjfaaHfUhdG%b30uND}e-6)jZ=1_{Z+uRv4(XwfGV
zi)x@XEQ+1pS4_r!Cuc17PRwslL1~E1iwHN($DRSl^RSdEZTt~_9G{vaJyvs}u~A9W
zfpy_!W^BUB)_a)7(0ENEM}VV~?q{oWK=M?(a-e@F6f<;cbF%VSf#A7dg6J43)758V
zGW+}LlyvXXHli6mtJD(PRnKT?Z|)@#er|VOVTuE2|0qF<;R-V1!@u2vp66VGjpD4P
z)vkmOrRW06*0tGK+->oVF_1})Hs{QALQvjSsR+I)Q!WZzwV`eOO8mC|*>2_`785bz
zHtR!L&UQhx?UBqnI=VZbFqt1l&bU^zC);6={z^>bEi;n3o)t-Lj8SwR5w9dtVZCD|
z`Y-K+8)V#kzuu+J>?yM5R}r4N(b&fJdI?w1uejz4FNzt;&_XE&TJ_fpc5rO|-0%K6
zr_;zkRZDApPyJ>D1(xB{zkRm(u8cz1h=<Zy8@ALP1MqUS*9#U6-^Z2?wBIl?(qufs
zd8ZVn=O?DuY+KEp{En{?&DD4L>k+G!g(*=dxW#<SO&8?5abtBFi<(T&sUzi7PuFsU
z;|7zkz1L13-IgKUU;R3yN$2=T!Tx-G-%UdMaJDD=hM)xCeIkDX>I@y;oIZ}9osI>3
z*fj2UxdLG2Gcntvh>2%E_U9L+h*ll-1w0pG+$GG@Y91IH>MZ@HSEc~xMXo${_BgB=
zea(|`8`sSUqoVVJ(_fj9PGM(-MQ?uH2_3*{3aEu2Q+>U_d^Un`AKZ6zshIFrw01V)
zPUJ=A9Lco&%-82i3z!zX)U6p#p9_@}|Ag6Y=y||!UQ0VTN5-Vcp6_EPRhwflRkbto
zJV|>v-F*vYHRPmrZ@`0Z54GG%GIvj-NRuYx7ty={+kVnq_3p|bv%$=#NP3S?k&!MA
zZnCKlH=JEd+2+S@0}Gx|y~Z3h`&wn*Vz$dW!RU%fi>M$vW@*|f+F8hK>JJ<KSW&hI
zB-hCw$>t3_^X~^V>I=*{P{Bp-H|^XD!MG&ji+E#wnA3f2RcrzUm=`5`lbWO^9!@VB
zM^$s-`H2qjqN#<+acR6>kW_JT-DVp`DhnmK{@RX}Blx$~jhGkWVZW5Ut#css<qPIr
z9wOXl3ijR%j93%}$x@ZlspxcR)TooPTNZ$kIZ@(DY`Fz3#P1p1`Kt`*!{Jt=$2f(_
z8r`EDF}w|TNW0ty+I;D%_+Q<kdRLZKbEw-zBLO2(O^UdYQG9x5NlVFA7_9rND_izb
zk7eAmBjY~2d*Jhfae;1EkP#v7xJTaaW7jwqsPW0i=?a^k`0-oK5?-4Adj?7Zq|ts5
zych&Hw^+4)7jHV}?%vFsV~GwQ`cAHxmA-T@CCQqN4tc++(-bw!!m~fk-?;l{JG6=y
zgD6g}qi`Z~1|ecNH%9Ub<F}X2mM0q>Ryc8r=%W3@0t3~1-A!A1%GdFyl<_P0t+1&}
zkKT2B1xmkIA+F+k`1!I`Zz~amw@0pp%8D`Zaah;)SyGDPMBu0clzj@>4e;``aD=6z
z-|yHo)4&tiG=}*~LimSbk|%Swty)OQb$4{!1&<8y1fYuSEvB)s*jt$5NT?;+7wauq
z;3DG}eKbRdvV7@0ELSyG_O*prx5H&q(XDfH#}zdgS{9f0bj1)JFLq4aU(4He;y=8{
zlWXtj=KqNpSD#wsr`M#Njs=D?9{JLYzZ65xF=G={vg=@J$drqw<Fou7w79rvJ;AA6
z=nocJb)d0U!K@8tw$*2L5yr>cb5M~2!25!ls{jKEuqpPBhTbB`UKSM@#<7EO1{KYj
zy|y7!@8;P;a?v>akV>#1ER9eYdeSA*!J)tlLkuvGAqA`gim3v<wcNS^95X0)17v-e
zXW>m1cKHP59E7l;A@lXoBE>Y(L@))QtPW0~!l7oEF3<N=3>&Bl#^d!&Ps}fAMl_}t
z>BN68ywe7uqQJ?)S4nRTm9@3WXssqIi=f;@t*wEFU!%xGn%BICtosl(A%l`WA6Nma
z<gpkn3uaqzl_!Fb$P2=T)&&%0MJcHu50v(IOT*K%8eP%T)6>>4(#@{c7|-26BYSzu
zDKs>sW=nu6EH3Wq>QJzrcVI5;mLhn-*bFz%c?p-6_a?H%5HNUSzR>or)OKq8DJdzb
z3}7>zn&DHyJYX2A%WzOtHtzyF$v{pxS&`hnE&YK>sdjphQz5=jzbbSw4C{uG-`8Ze
zwe`+$NDlLMNP(c5B~oNME-pH5GYo@)UA&{G=Zm+;(FJQ)Jf{lqWlN?|+>34eHv1<h
z_x|K+ym;}VQr{zAhqrKSvfEnV<v8C~`V*<i^Re+$Z&pfs?+!V?5PEg{)QOuMYXw+>
zIy&1O^ynwjpC5kzMDgEPjfrV1y~Q?!ocHc}K?NUM&q>+xOz5#*v#hC({EhDF?wQ0a
zfV_T*e4RoM&s7o=3n#Af!hDiuQSN!D?*!Dsi_d&P7KA|<c3KjU1#Qlp{Fax~fD*bV
zCN|~rH7m>D&stT+vHS5~W|$v3vGA_7OOutFY-#&RDOl??&4jOP3^}cZlZqe#@vB6X
zjqUEg7r4ctm+%IkG6p<KtleE*bTC(tW$?x}Nx)`xn8pa4Mfr#S#c0_;pQhd5TQ}k*
zBPDf{ocyorNd#&R;2Tn=rQblu2uVH9w)Q8SejcaB^+JsAKm`=}Mc!ZkAGFqa-4D+O
z-wW+(XBHAB#aR$LkuU~o0c6tt#>5@8<lZ*aAP<1%6JWQFA>2`H7?|L{K&S(QN=N(!
zTp`2fuHDsKMn@CRc{t`ZhVrh&lonF9?N>%P@6H{IHUWO~A_G7Q0%11LSSZ68xEZUN
zaJOa}(!oGkJuYZ99N{+O6AuxOvd5Eh=ykvtMo$?yabNMeZyTYUu{6HN#^50p0_^uB
zp5rT={C}_ClK<AqTu2dXd!dJclb<fe;`GRkiQgah26E-e;5C6ynI$>iVE&`?U9xY?
z<6a<ZH|o<Jlw-0-2t7sq3_Jc=Xw2owM<z)sz{KS*zrSFVkotFU&sO1E-zVbeYn`?s
z&v|in%3JkP>}Nx4Ap^%Ch27q$eb&p%{>dfU`rzS1t*{48Aodf}5-u(LK+aE=no{6s
z@QrCR^aIWRGnl3|xZ?{&-`6?x=ZyI<4*OW8PWC%;J_w!MCq(;CXos}N99H+dqLJYL
zeo3&lNavEU_V81cv}`CH9!+!pj8E9|mUqx_?EBnq46r)DG%RsazPm@|(hj~cnXe9|
z|D|KJs%e=O_j@eSZnC=>c`YAjL*-t611YArHOYfP2>aN4Ru^Or24~<fd~0Qk(6M>r
zFh+3+Lo5Sy|Krw1)VzG6mgWU!<1U=w2^Q5AlV0eYGez+PJVL8IGB8qM1BW`@y|uSq
z;~vymr|SZ1v;%aG=ma=0-33~aj_Bg)c=um<U)0#|<7Xf42p879l<a26J}X!_F^L4d
z%h9HnqPt2^VAfIj+IAyGfJBTMOfLtO3OCppX|&+eEEzvM)=&YVB>H@F#E?Dws*2CM
zhfJbv0qjCVxE{f;o>j2fIao(8&xbA!3(jbhsw@x@7+I{3HXcob3QEL+#wtG6+O0>W
zeg0jU{7xq~&_H~vqB^4h`f1$Ju<@@y)FTC}l3d4wi8_oBx`Lwb-RE@jXP04%e;O{$
zbj}GJfKS>O&i;ZKgnGN1J$K~e+6DJbm~yQ?&V}l!n$WFJS?QF8<OMVqq?|ItE~wvy
zsk)LmLkzL5E06mbp)X7UdYWK(D|pS%zc$_M6;iO5`qKDPw18Ge;E2{s$9OK*hQj{?
znlx}!7{fnGzK?gf0O1mEu<N9FCj%DlIswRb<b&LEb7%?M_K|g>?{3$uCxy_*q~g#N
zvh`gP-oX`|q0*}p{P)F4$L`i=hfQy9*Gibv;$v6L1?xT=_#P7&6!Z{>nK%d-z#-P|
zZ-Q6PDW)45(oMV2QMbH3x`Fss#$dUPT<8C&)5MPYd*s0*F(EqS+3JJtc{U@y=KC7h
z!p_@vazdG?d-~EFPOX@IXU^}(kL%3gg1ybw@|thx=fROCdKk^qct33k&DFf0KuYxv
zc<bTiym`YnU8kcq?28e5U!jk5ke6$mbifg?zp&$Oye7<>qNbTol~N~&7JQA`KUD7S
zEr}O(e4;)d;v&M)`aWt9N<^@Ck9Au%SD8A<iqC1J=z#YQJ!0$HpFp4l)l>7XO4&H8
zoPI3Ng`%yGW9e*8n7%Pa$UC5omj#ZFKE#aUMXmb7>fPwbIHZP)QJAX$DY`GM(6r-b
zuRKS#kC%2a!%elN6x1Fkn3$fFwEm!JoqM6StA(#2j~*ZM_}5apiAcdHw&>9xd14X_
zBF#;hD389Dw<3a@u;(h!#}JWU$7ykXjE2QXj7Q_uLtlm7(IWFzcelN726gG#;)$Z(
zgh<>oXdF_+5LQ(IrH>ZK1vv`m!Z4*2uA4VwgJv4BtP^>gu$Vo(4&-M#EM0F=N<+gZ
zOvd>$+RdFNRDs84+Xy8DsMne!X>oOe6FIH_Ovrs(iX!-T*8k&^bbFg)w@7@lX5J)w
zHmoNK*Ml3bhKpDE(`Lsk0=Ift>`;*&pjEEpQq1N{_2vUs4gZ|>^~f1RGP^Oa4_N1#
z*%+V!iF^tN%du38Ei$|EUfdItboWC?>YprFfz6MirTSCP@VFA&!)T*bs%<alfj$Az
z4P2o_<mB=&rG#>yf|q2h9C)90lyx2)j9+28QFNz;<V$vTO<Bp9HO&arl&$V~_^5*X
z<BcC|@hy>NH|snzs}RJysE%{tnArYO;y9t^<@lc~ZvE>p%?_h$))VFL!1frOkkAot
z=-UYf=Jm3Reu=9d)YtLO0*viG0CT!AxRqPl+r3It#k>SyV%S+9P}uGq*bY>YR9U<0
zDR!N#m!@xCeNlL9aJu_0F4-`RF(r#ovs(~EtRiyL?*{#h6X25tNW;X)7!1r3I?B5;
zUS<{2JeL7xJE7`+aDAeL9t0R_g0Blj^%aj<gQtDv(}$%0O{4C9+Pp`Xi5wX|QWgG-
z5w)vmt?1_}Q75}plD<tRw1hI>iK?F~T8sX~OH+AA_!g5{FnvaRtx2cGl^wjF_KbpP
zAht5WgfHMl<gqx?prr~XW57hK3UV51suoOR9ULkmF!l>#jBR{q$B9A|`O6bZYs4Fp
z&&-wy3Xg~Tm<XRMbEI`+@THPdbt>7Our;BiBfTm|TQeYbJ^_P*8j)L}k&UF9YrSK*
zPd4m{(zIp+3AM|u$*(St8a&pE+TAzGMrx-mdJ?!n^Qp>DNJyA$xJ!iVQ&co6>{hPm
zm}YXVUo-<x_A$x`H*#JJ-O8KphIW%Ux%&5On8-wnw4#*7v;H;McrY9Tba^>bF9u7z
z49LPQ&ug;^`Et5e2&D$126X`dnCl7XM*c7I%d>MQOfK|v7Q?rJx9@Z1%{QKTOwlpK
z{s~gv_c!wN;V@ty|Mo3tXzt_<=9p}m@<+!2ceYo#4S2K^oSpdqp1VGf@H%9ZjiLds
zw(vwdt%Pq1xU(HwZ)+8Te;sAQw4M-z&f$)scQim5pljyTXkpr0hl9>!X}6Ozy6Pp(
z9P%UKi))<9Te8;H;_C%>cIKnP?2H=OJGmQ)6pU$SBp#~iSL-n&AW**osJUZ30v_K;
zYLPaWQb6%QXx;Ig%plPMA?)s>CnJLmayZz`UxU9l2tI*9Z@UU*xCa{bV+(jt3MX{@
zT&Q{vdz>B+G~2H>OC{{Q^uI>ZsXMX!WnjPYan$0n)AJpZ5>D-@d#cg;!s)$Dxli**
zQBxy8@PU$F0z;^$ha25>N7HN1AGypnqQfT+$U8dn%qrU2)*U=U>6wooUxVm@Ac&5O
z15X|kTKFAKbNFZolzk6osCivhHBsJvn7-W1gCmQj8df~qz~ypbb4@bjREUg-mSt?g
zwfiDnS<j0edP2HSwI9}KQYI4w?P!36`wIT;py1#g_~?XW_^gMT)&3NrW|(d`0U8&5
z6sGj8f}fV{DesNfxN)Kk>EIDH?TTd)J@}(RW{UEk!94L7c$j)`7CQCDQ7c3AHPLRX
z!sM%W%9(Ckquy1;Qi~~9L%y%CnmvE1j8%`RFm@7pmlG;;4T>XxbzxyiXnV)bY?#j+
zhn!&A8?*3@QeaS_QgpaEHR?1&X`d$Ia$o4<^+$*iw)=<hc_S##aZ#C-Mb(R1ZWbz?
zq)$E$3^R_U?<y2+(aL3}$YS9Pfs2P|$GnqRId=d2ksZeLvM({o1#L{n*Kq;nU>Vsf
z3mpsp-7Up^yNAH1$VxF#YE4d#7T03e$a~5s&&yppxeuEZ)Cr!24`+CXXg9PiJb(fr
zRXq4PWQpq|{>sT<2{5_4)D!iz?A`e91?+460(b5~m8Ki}I}p%Qe48!rGK%w9g;S{|
z`rLF?6-V%E{hFGs|AsW>@5Ny6+MQkx)aR7wJ<MdhDqC;G#dzym@~jRK!-=4>H`!<S
z$zGp4A~PeOIwOdAmlf<r^mLB*;2b|ME&T2o!2-YyrHuT%<R>^?IO+9_<H|pMQQS(8
zA}ioiUUvFK;_49WI;~NAWiRwz&s=^%oxSAe`-EX;b4W03(Z!H}CzwMY++mHX245^x
zu0j}MGzf64lj@u3>pJNo+P3y@$SKyNPys_WHbLonYgKBi-q{JFw>3JW%<o+wx9EaH
z&hoUZA(Li573%h4G~o7Z>xMp$V%PQ<)$4q6V8vA~pQ`zXxw{T8wDzl-(Y0L+)ZJA@
zzxOZlu>C+vK&4$de6<5>fb83D8JK^OR#9j8OdDaA{A_PG=UfFKrkw5Q#wd;NXz{Bx
z0WyIe;6PuPj&g<SB{T}#-lbZgFzLO1gW3AD^gVUjwN$43!4_$TBPs&;fSY#jH=6lh
zcnH)rBu||rcX2~7lG+f*VPqB45p34ASARS=(-W>DUkq5z)6vzxs3^&gJ0l_xxcdg9
zXZGGOl%3a&`TE;z^kU*G&0KsOHJmr^oqPn;5go4jxu<{zhjiROBthwyA^({6H9I{C
zoG7lT;{4|;%5@I^?i7)Nm~-~X0wvF7HU|~%_ZF$+g&O@J9pCtCO39~!8E7cn;K*I=
z*8Z%%)@*1@`u}ux=HXPn>mFY+WiFKr3k{TH%4}r{MVaRzL#B+GmYIx&GL=kOhDC{x
zsliMlGAk7#RHh<YWGLd?kKexbK4+hO_Bq%2Cx5iAcdhq*pZ9t0=X-y@pLg3ncoy3!
z@T*^}?|B#flA1d`#3m_ilG1m+@@X^P!6c0OuIdLO3D<aLSDx<C!g|5mCX#^~*T%<-
zsOj1F7xqJwIRQ*yl_N5wSBhYs7D;WU571#tSU0vg;ySwfUhl|L*39v=6c-jc6%#U4
z98NNupDN+L%Q9unx@BGiWAC#uIkl2#d|{ttB96TO*`sk=+WqK;rM+;QQRJLsir1xo
zfOk-6KT4U}LT0?EasPFIvi{7_x3}SpoSR(6L<H}tQAsHkb}_Zgx++^&8VS9WCV$dd
z|0J4Ju%esPzU|vfnTpOy{at?Sl$|ZIc-MP1iDF5D(c}Ot2n=>GbuZS-S1kFfNI#NR
zq)hE<SZroA9CoHI=;1-tVCOeHV9=dfe|N7J6UX{#Eq#|J8Yh+}Pdi6aoy8|O<k|D+
zRfAhl63d!T*rc!Fgatc<Df~>xP`Sh-3<IhyVFhPkD13h7bBuc&mWG_QkCVqtYJFa8
zn`%D~dMYPV&P0~WZB^SR4P0mP|HEc4om7QoG!gA-Hlez>I@bSyp>?`RzhyZBolCuV
zfT)w9+YdfQYW^OMhnCn5{X55+8A>W-@tuH?`MQ=Kfwn=nWqpZ;#$8LQciU1`;DAz~
zDu|G|S=ZP%e7~(qJKFg$bjc|K56K${7{<iK-3M1clt_4RqPubrRNAW}tJexRd}^N#
zH-##Im{b7fspON!39vQygzT7A<9_g|)fHJgbuEcp>Y>OI7YeW)6e@JY%65DAT_7+-
zF)WAboM|nf(Sl$QnD1>ts(>ce60~ZTAkJd-!xN(LpC_6lOI<()E3E|q>qC`$AMEXs
zKLvmQ_5fz0=awH$6Fv*3y9<7<=Tg&ZGwQZ4_AB`DB*G}u7P;Y3Y-&YLwSRVu0~43%
zI^OsZ0|nYk!_{Gj95zDFWk$#itau8VJrMy#ZrN-bTsRr!<s#6{o8~9re2Q+w#UXl3
zr>-lQ!Kp;x9~o!iBS|i{?HK|h)Ac9mx-o5c-@a9(=KE#pRe8!+ug5fY@zg$+%StO8
z0{^8lBZ6zbv=Xo@z;<Z^v)bdU^N{&Nv02D13W5mq34&05b?y+chtz6)aS8a6K({Kt
zLYVnvgMr_$W3p%7OL}-FQu>W#?_cwp-k`Hkf_dgO3dhlP^Tqh<TYugKk?&u67eDak
z4WDso=Ej`^{3;Li^3~MUmw#77A(aEP6(j>9^K^E|Zzn{0Q02ZlS-YrkhvWIm)2fXG
zHnAwgrAS-yQc>}G<eE5Q{Z%$`$%B4=jA2$;&8z)Wm}SGn1?*3UJk^GZ;qsNagJG{C
zv`hD8I%Ee#L6t2tf^uKgvte5@ss%s+K&mNt-YXpXxDX*Dwsz*$VddHJrlz?$sC;c~
zY=m&<V5B&`r;lV93BdEv-&(a@r4v#qDKY(mjb`r|xOTDoXQk6ZygGab897z|pIsN_
zjlo0@VkM$E8HK=TOW=}&iVfn1WzcTM%#Sq?ipv@^;1j^6`uG8h^WOb@?kXV1fc)`_
zgue;R_PVTlk8t`>WLMd6JZGvqfBr;yP<h8$c2SDyf$O~jKMKXHt-m$=ivHgVv#yjU
zldY+O7ct--i<oPK#`0${Ta_LLqDm^-;Q+t||I1lLCAj!!dU$#pclCEsm$27Xk`BtY
zv+o(EyJ>VhlFE3FDLv%YpY7f?CsrW7))wD)jBT*UFcyl<H^s)na2CK{k$~E03L5bF
zu%p0Wr9F^LT^ne(IzQY3IIUMuE+UjzPVf?qMV;@I)%Nd$5H};Afb45N=+&}NpQAKe
zFvP;?IV9s?Pf(b4P9;LyRRAK>=8*<e>o+gc+w(AqM%Un9pZ}6{)V-dpr5;=8gn1kE
zr8ljs1|<scU=X+qkyHZYSO@N@GU0*#W{mV(hD$~zCf0GME`zUZ8ooOXVTusc0)l*E
zKssA8VSEND0sP37s=FYfXF$I>9qwj2aFiycg#`B<+}otCB{|^IWV7m-@AAu^C0))y
za%GMhCr#rk=D-f*h+_lJrqZnY4j!D{^XCXWT#~kN?`k{_3;rDt0Z%KvHwY7o^iTV&
zC&Rh8S`2A^60dv{h!=|CjV9+ejJQ4Rlx<(4+JHI+ls5kSa1>nZ1(jB9E;_=uQ}vg~
zxojk1Pjc0>^~3%1y>1zoO}Lk>i}ec>d8BIcVZLtFF0HRkw+~?5Q&m&|@)313IoQ>r
z)h3QLR=SCM5JMw7kE0oTy(^5&viQT2458p;%U-|OgQuld2wK8(7O`cG>_X)}>SuP2
zW`*-!cUe}@mL$$Tc(;|H7BSj-qrYz?JOef~g9$@6Kz{?@;v-AVz@`~l`BHih?}0>V
z@dK(`qTYO)6(h}xZ9#AChhGrg$1y4?p=iM)J!bYBb(I;e7S=Z*Jrl)x_BOD|u^%H{
zn`)G5s;aTr7b^_D{g{|0!?J{mfDqT~cU~UvD}9$tYdxtpGfJp4u`{)*4^GZG+8MfY
z9)ynx->1?9XFk_!AAB)$%<-%t5ICMcaT3XJ5D0R|RByP2MkNcQ;+EzbEK>0ts_z37
zqFTb)eoi(L#0?B}o?f9NXKjz{e(dJaBhgDUmn4YR@NiL&LvyDaXLYeuC31#|LKd6c
zNfbslN3bK)6ZTY)%Q=5x$wBSc&f<(IvRKjXS`yuCdqfxLXlO__P{tj6?&6%BJBI73
z!JSyV8d`BNXJ7jKtQFQ`n!Z(Q#HMxTcG511P>o5mXQoo+!<jDaysGGYDD+$f9OhF5
zi(iC0J07iyuz^ldN=T}*KGmf*`jxTCofQ;wa`W}BF3U^-*W@c^J&Po@OY_?z*PS;V
zP5m~T$M$e9&cysx*oL!k!~zSYil-f8Ew;iDguXQES<{4%ji+5Z;SakuUpZ(4A)dRP
z{_cC!f;k<L6Fo1S59d`sgk#5fKM&8cs3QjYOv&UfjB#+**WucN%9Ndp8fjjAobfxX
z@RA4G+sqtu^<md2pn#_~CRof@7&^4l*PY{2db>{A^E9x+-pdv<Qtf%cO(-I&P(>vo
zZl}jvK4@^6vI_m{P)0Hn<%+;EvWoBiE!JGQ)kiREc^)2;z%PzQ4UOnduUA$>+B9rA
zt|+bNklqK!8y<yD?`Yre?UI-$6^bdorn&cr+aLzjip+LJkGoPvGi}LnSGapFuC!4f
z<LkiRxFNC~9VmV|a%%GPz{)mPW%4L0?V0U5C5sn4&mEAylS$pk(VD6fZ+Jaoik$gj
zpP%Jg!;SP)FUJpG->K}H+uV-Z7Opsbv3_0|as!@+rSulE?Jeu%LHkcl#t-wy2<rDy
z(X=~-Rpyp^UdF}uFSn!<N$d|h2Up8H`;&cAeW<E8BWt!8_$YRUq@HeKt7&T0gbO3X
zo1&u4lA(Qa4Z?Jbqoxf~0q8MMyH)#*lrW<y;af>2EM`LOIjs-7WS#B)8Lla+@|Ev&
z+`$vuM5SA(Ub<vowNKKwB&|j|%4MOUckysX{>k^tA)rGCIs083v|J97%|%78nh1yZ
z?vei7)bm;oOODPL^>@QZUyk?<zcc9gDSLPfQ$Qs*vmK_iQ%qly^5RK&^#P@Z%s!Yg
z@;+EsIM(sBW#yvoulYnI*2TWmE2x|PILV8inqSW!ypI$J@^n?qJ-)Gi8r$(Gd4Fd}
z-RNAll*a39seql*Q@-Cr&rh%BO<<w3_RVD;FB-Etw}r1heFSbgL;b~gCUn0$sONpx
zFJ(qv_0Q(NLY}blPR6j=`RwCdgVVnr*=Lb$Ws*um$@vd~T^%bIm<*f(C_YZ;=24JH
zq)chg7cLvH5LrJS7AT{nW2ydN`VO+A8)&nAgiOlg(B@i(gWz0VQd;wu#0{AvHMb&9
zFq)Nce)n<Y|Jv#Ht%0=nf7a%%f;$Z9Z*^PCIomow`*5rKE?M;qsDHjLgAmc%DJqFN
zMHf|}&DlLy2Fx1>$y8S0KiiaXKjA)DllVSCW{bW+kz^VH&;-U*A&X96ggU*sPa`$v
zf3&P&m3^^kTV7#%^YN4|n9Bhb=39pxRcS&Ne3tdq7e@fj2;oV+`!QvfH)GWLtMiMq
zqykzid#TDVg-?H>xE*sq9X8k=J#Py(IW)+=Jf?7f<sD%fK=iBW1jCu8i3#i6U|HX5
zIdG!Ag0cYA&9%b0fdVG`&g5LcEJ4744rYY{Pk>hZO@mVlv8!s)_5?GawC~sPajQ>i
zl6rK8pQ4m={F6Bj)wl*RX#Ps?W6lKI64Y-qMn+(b0+--fGc&V<1w_6B#+c*#?yE<O
z5laM!Cx{rfBcHT$1;q_k4*&7|Cl&7Yq1S7?rz6=MWDF=THEywIwXvBL7s*|dTRxsx
zBh@?kfK!ukPQ~A<%O_&FTddDNYj^O%@2!nRVVQD-yMrd%VC5QeRNVu>0b+|F>l;0n
zY&x7^K*JzZcwh_lvjF5n8a5|JjX3mO$8J{f7^!;jmoz<BoYoimI2aaNPaU33wSE@7
zE$Hawe`DO1KZbT{6^a6SB1GYbp8-sfAB^}|`QV{Pn}C5qgK>6tmQKF!#dSI)27zaA
zj&eluU+Zs4R`FPx!4TQIZ6ireQ7_AzPKh`%+bwtHthbH!#s;+Ijbhpt7Zzq5X-oh2
z8V`wY=08X@G&G|j8{cIDOCdn4Q)zde_Y|0<1uST#S_SMBPQCvK^cLl&kX|8fQf#Y=
z<MIF@)u%}}DU{cH&0Uc)y>EKr?f<2y1E9$K>JSKN%di=G^VmT<aYM+mKK-tkL#5B0
zk?-PI4|KKJ<O0PLUAC$Bp13scJNqLh%d(qi;NrdG**_fAD9x011^0V0GW-~Q+lQfc
z2wm7F(u(9PM2fkO$s#!qY<vh<fgTFT{`$Skk-<j+$z%z(2z7F9;F25q?u*2}#2`!G
zVSALIM%fOqw85V?(CXPv*<2I0f+s|WU6TIt-aek+rcx$Pc1Jm<*;?VHiT-`P-TBc|
zqXh0FKIspdhBbWWij@sOx&+@NDEbiNVVec{N~ojS^GEZ<{v@aQmHMM?@p!lE0`jD@
zQ_+r3#{+FD$Xq)*_7qdh=z*2sqJu7^nedTRXS%a01i<E?-p(ejR$LTp7vF$i?a-`p
zyWqBKXHMT;y=qn+O&R<uYm9@}c`>E_*%d8XO(ub#8Kc-N5sv9@65e1J63>*B+7o&e
za_lPCEE4e59YvF)!m*7@IDvI#Z@LR>iwk`olia9ixE3~K0)-A(-*si@P8rx^^z^O2
z888@5<zL+ph^7<^f2BP}@)KWT+Bs-z1f|DJmPn?q0P*o$<lqXKtP6|mDr;-2^q_bb
zuPMGu$1HdwLzPL3rToE&27jybcCI4ohcYWIGz|WlkqcI|O}ZPl-0GZYot!QA?hc+N
zLE$Hbrg7XeY_{3<Tk28q_T}`$Bw@559G=W^*ynzTh1z#fy`4j1NmVVkAOL@}Z*?9T
zNj!1<YZ5C#YM;-eRcUv{$fq2Ry3iiYv9ttk97QN+{!tl&Viqu;lodZ{bl$FvdJuuS
zmAChZ1uTsRbWz#-iXb!`mmx@=DK^@o6$}mfOw+wK9;E3e->_h6cFbAq<t3!s@K9VC
z0{K@tN8ZDk<h2!7Ixp8@+Bn)UqACly*TItLeAN1RxZS}Qs>%z#GC0#boOjruRrGbb
zT$2y-t9mbkl9qqZ^$xVZywlSBaMdr6J2j83i9V}Dc_cD!=NF|W?ZVICwnHR>Oxss*
zy@#cknTdM5M;bp)K*00LTRXMK@8YmuxBGd5c5cCtu~kjz$GCVN&)7Ad0=$D&baJRP
zw!iqGzs#NA>TdV(J)9+$-)E|Mc51?F<pj4SfenSEA>{*e)Fl_GEo#@r9|YgKa<L3l
zqOpexw>eL5awJWnHs9?%EF5%(#eu0!&v!32NYk-Mw?Ct;g!JNwWEVe5FEaJ@(|kfM
zt{Al#f_TT{`75=L6J3pr8nyzPvl7KZWPi?Do34->52V`}`4E&#mI%ZhQM1)&7IH^Z
zrj{vJgEgq<cxE0|5%uyT{4T#Mx9;tD&qDuWe;KoaM5+valRd?p0^NMd=V?}EY2z<#
zsv>1RvVO_5Bc#uE^+&Y#=ZbU&3UiQ`JYZ9A!9mP2v?VDP_0sQSAoiUzZnW@;oNJ0<
z?t(4RR<-b=ly`KAo*vQDmR0UUC5~>k&DG6D;`_+GKW<AjJ73fK>bA#n&wC&BS+xgi
zwldapMlm<SJ--C+Gbeg4%xk#SYZOU7ztl#O{7B}%AhUh_SlT&s`ec>CGNWFx4!_9e
zWxM0%?_i(PbIoTKFw;gcv<R5bKK+0>%GODdIiv0~7j=wf<|w*zwCVSG{8vr;mP}jq
z@BWePaV#q?b`4{bxEZ-Nx%{}8c^^P*0yJkbJ9-||x7VBnGL)l@r~F^%#x6mtr=M%D
zzP3a@#MZT^F)!fRd99z+Wy@Dy4+YBz&a5$7h;W|1Oc?FXqi*rklID_P)D-BAMi2OY
z9pBTy<<{_ipmiOb<E1XmgPTfUUGHSphPn~GH7M@6bax!Q(fj$+3D*Qijn}+%m6~oL
z_eJIIQi3ij;z1l&N|v<~YwMDKdsyI~H={?+*lzm!|8{!S=@rrt%WvMJJ>1)U3QVud
z!W;5#^?QG>F_qTvgU>CmL-2|CNuhOI$T9ZQ)B!(rXNtrgrPB)4{Ms~Pf*o5mJ$5&Q
z9_NQ|Bt(6iw-rqc8Yi(jxN98<u}Qhd%(i&^P4%f{A%=a&f1h-AIdMX#NnS;w)VF!^
z^2w)gPfKg(iN&hw9hzHcxT%an<fKN1n(4*-ut>!lJN7#oMv{pYWQON?Zzo_6W#p-H
z(z9aHtQZsJrpFt0aWc-x+%1#HZaFxfdLI`AXI~k%Ftx3kGE<BH{e+pIlY5vSFfTFY
zw_3=$xYZ`Y4dw7Z2^D0i14)pwC(C35q;0uNns$rbrhmEu-*xSpc+t(k@Ux4(lhaeh
z4x)B`_3q?%WZ@`Kn(F(W_O@W_%bhgvZGY13`-(PG4)@69JpU;kPqFW6c57~Yme^X~
zo(-<7vSSZK1Ouf7sV%fc<4**I1GsIR#rgTt@WTskO38s9sLP)e^rg+3imCAuQ-=)I
z&{+xRAAScDJ~DD&^`_aEq^}3b*dZwo(G<<qeYt&!Qzpx+$(yxdnH2|=BJ!vm052sD
zjv`Q7bn{ehVCC9aTR_+W+x9Nhpa$RlJg!V(`CqlN<^3s-9Qpi?cxFgX|5Fr)&=7&V
zmn-Yr{It|fv7At`n*}w^Br-oqUyna4BEi*z1I)7d_o0Y`wv+YqFzj(DDrwzP+Gmq?
zyi$~5eSsbjg*^j(at~=N5AZlZ4W*NxhRWbF6m=+QSX^3r6TDFgi;WuzAjIl}a}k3I
zX9#S(dg3&IFs_J~Bq^B+b(>Us#Y1YCwwWNTx3YhMu49+{q*j9NP3L)@jHCM>viBJ7
zjh=Y5cQ;+6+0)GUSeEMw+pQ(9Le3^v&Cmp`4#y_wM<|!dH^nFRWXp{}W7neG!pJBU
z3Ta7j`QcL_^w^DXNRUqGW3}xD#u@{#7xm1qATku-V7meR<%Rvbqp;Ts6gmI23}=nS
zF<{5BeUXhEJ#K6yG{Pll-Yd-&{DCcJG6)_4#*FiAP!sY37~=i*&;>Cti9$tI4jfbr
zimJfHaeP~NyI2$4g@D7h1vS@l@hV`ap=&%U1fxpX_jEmp%I>mKnvi@@M&|0M9og4J
z1@#X7=<20MX-`(`FtiyI5RPRCgD^*3N*aJk4AY>uS>rV|`i<@{5!L)k2}E(C07g1^
zVBfww?XP2FV!Aee^)7P*?qWlqW1^tgY)@*&g&Q603H-0TQYIkcA~xju%(V>m9Am~6
zmQM~VN8QTKUZ{n4B9L<?fv@T@h^u;PT2UJdc(;f<s=YbV&SCddmMl65()H4JI~7U|
z661CFq~fW8h6_yp$5-?rm=H_LgTt|h0bN8lSBcUXWn&%tiQ&aPkUIFt<@c8w`Yo_N
z1;Is*91-G&3s6H*QQ_gXH>QQmy@`Q4WH*PD<=gM{KNx(@=SjSj={?5oRbom#UG_|$
ze=#q0&#hvL0_a313toJGJlx#pogwgi?ts7Wg5zHXLOFo0Q4ztxiaqTchtkXdZ^onw
zTWLY8Lo-91bl&{PktEf5Uslo902yq#ui)nIlbfp?(eMAf((Vc&4skbmge|Eo)8)%V
zFbtg&eBPF`-=4kUS281*dkh}Y$}Q5BM#}N}Px3c+#6zGKbs<(4$F-r+44h&zB4Db4
ze~m@*rzqH27wVU_Ag_(!|A67X59pv$`_F&#7GnNWy^XLr&@xa4Ze$G%Peczyn>=_U
zMja>4&-be7R$14lu%M9_&#sJde<)!K-mEAiPu}cxpy`Fj{_bhN(z!X{BQl1;vHPI|
zf&uaTJXm9kd!R{yKq*WLn<9YD{?f+bZU>H%QaKL{;M!E-`<&ht9ROb5Cd6ni11h;O
zkPW5>Ro~ODAHR)uxetk~Zv{DM$M;N+QWf~QgGsbs_d*Eq9{ZEmIq-^(#y$I0CEziM
zp|}Yp2&gs445p&OGl{nH|NZqC|Cj$<pD*vZ<h!u`C@wAzqY8uMh>Xty{`B#7N0wI<
zGhmFt0LP3%ni-DY@d((H;LA<9Lf93M;SYBhcF$tQ=AM+21i`)q!p<8!=wYLW%O8yx
z%bEZBrz86xf2Pvb+F4s*R|Hr4h_&@_U*(Bga?)@qV1JSGQ2~ZiVbhsinv2@2Kez0(
z|M6$3Wdom^O~8wHn82z2{nZ~3QPhFZ5#S;Z!Ef%*rQmH)7#`TTBlnL#bT8Km6t#9<
z9Mb=Q>)&3lOj!`2_ZJyFRDwVi<m+hwKorh+!Z8?p*530&w*jYQ^LTiD{YxS_r(QoR
zSb-}lDiD(rwYeZMRsl^Y#pna#545r_AUZlB5rEyveexRu{nnI^(kugM{_?%D0kY)<
z;J*`5Vh?s-Y_sJNoiGm~KM<cqtvFgupiu&N&cprIQ&4pU?0{DPRZ7o;s7yx-k?i{+
ze}2QRj2A8eda=Xn0&Q;SEwuxg5Y6@|wg(@g_jWwf>{hk>MqRi9=uU`|Cs;N50`g7G
z&{x>SY1WT;M?en(hMh1RDFqY>4}gLWt^kFQ8h4*a0y@&``T#umTtw0Q4FZ1eHJW{p
zM6@F+cX6B(GKD!`Ge`caxs}7^%NcOJGEUjHQHT^xP^802QR3o=KI=cM$<yH{QsR<{
zhOlE61HYAG^j-r5@Gdega|4Csve^?yYv_p`oV^wC0+54%(_SIxJl7xi`)ff@ywn)$
ztVr-VIq9@nz>K+$R^Ns*t?PX8;)NqXT!Bl(aLF!-JHMcS9Nwz&5TnW*3}OBNfRCd`
z61r#%-qL6C;r$0s+{eeh&)kv*y(SiZsoqm(LuhRd0cnD$Ov{eCCh_bNd2mb7-G%2I
z8ex|ya3!+$C}x;BWxTFI{}KG<i$-882>^fjuKg*Ks_-%cS#UH1EI-qwtU<E>6Gc2)
zgrPqIuY70*z+oq}Itrm1Lu{elQJx@cX#=ZvI3w>Y3LHQv5gEl`0ChqOr3O--(jlAv
z=pKX0auf_j`7oQIh7C$89?$lVsKL)r;hxCq9@5@-kmlUg-#@Kk#mhMAkqyx~MDPbp
zwrRD;Vdx^-Lf_?rSJ>OP25@E3z~F;K1F?k$SM|}CFt-Sup?lCOGblB`i}a%P^>xfS
z7Hnl@C9I=+0WC{4Q9D;^3kR<U>ZteJd%@?gEpRIn=6U}jCm5EOwE4h&yb-#$m5?2s
z_YH#Y;YS!Zn5j|Z4+ez}__1Nd-Va0n1vIJvhb{snTRCJW)5%pr`ABg3=!32WH;<gt
zMhD4_gXVbj&I3kozB51}rR8ekgW3F~T@)!9eg{ZE1F-Bs=F!|9ZUYZ5uTyrI`K?n?
zjaN4iTNIX~UPEJx12Cp6VbX#QDy=u`nOqo&2vQHL+e;t7V8np9iQH5E{&3al`B4}U
zP>d*g2+zZHn7xBs)k$C*)PX_(12vGiN(%MwwMFeJw~y#T!P<uqw^NfHcYe;kKH8s4
zc^$k*)qmQa01FWv7kaOSV3)%jrK^X2+Yd0l1BqxW?PO{73kmm=P{qLbppveA&H=nJ
zxG@|l{Dz-l`b96bC;{3U*c4^aLDA=J8{b3Crv(d(L?Rulw8tSRweACs4S`4yfeu0j
zaQH^WXqNrif!|KR;@AeIDSVs`rZtTmc`&3GP~)D|@13<b&;1OZcro}gQMgNza2Aqq
zr|G=a&e#HJg6c;#&`DTk`*-zzyGqs#!W%iKnU4Q#ZZsnEYJPfgT@zaMnjj%{BdZ_G
zfN!5@Y`O!#D-lH+fw56u161E0AWztWR>g^CJ9r{`;458V*46+%at}PoFsEjNcTywF
r>m)2Qdvp4+|8}ze|Ndw1Ho0@*idgWuK@oTz7v}s~EtOKGOA-G8N=$94

literal 0
HcmV?d00001

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/images/hello_world_tflm.png
new file mode 100644
index 0000000000000000000000000000000000000000..a89fa086622a88ff20e79fab1b5be560f0671366
GIT binary patch
literal 27448
zcmaI81z47A*DVTyAOf;LL?o3Iq#Gm*It3(@2I=k&K}t$W8Wa$uq`Rc0OQiE9rMqFz
zcYXW+_u1$C*IC!)T1$9%-aF=f&oRdsli*kK61cZXZlj=};7Un~Dxsj<;6p({O}T{u
zU!j!6;(`D0JBX<}C|esjIP2LNqR8nv*jQLQSeWWRbTYKFH?_9nWZ_}qWO``g;9z4f
zz{+a*-w&`@+ZnU=A2uAqOR#Jt-`Jy|;OQa%pnez0Hbp@(`z9s&{IyH+=CrHk>%*&#
z-BF+5Csi0jb4rOa)!%eYjA$FVM)NhL?Xk++qzg5wGpDDzz76fw(0q(FoaoiiHw}>a
zM)&>A8zZyCP~W$7-(%^LXDOqV5y{VA8R@x*P)6*3IrZvKK263-==V?y(Idv7f`3qH
zevP6A!9Rptw0!V)0-KmQ_?zTUMPF*<w<WN`;jidq|Km5a;|tMt{F9T(!pZnwjF%V|
zt1~s+kq{LX-TTRgBcCE5_U;|c^XJu-gM^*vE59PCMa9HU1#s~3@vCu6O-<Dc{%H0y
zL}AIpPokca5|WVoMVP=3)3BD5lmvbJXy*~Wk|jyUcfaP_i<d8Bk;5Y7aDAYi{=EHv
z7-+U+coRN;qzosH-~LK}bV|zR`m4;KTX*hcsTb?tEcp;ZK!%TRYr8eYKQugix>-5v
zLH?1D{hM@bx^yffA*=Sq#~&H!F!tod#YLOBpQyfme!-!knJ&8vZwXZd``B>$Ql56~
zB0BhO=1|JbMrs!Iy{i9RBYF6SguMJ6MMcFJMwPD7c}aYwl6O)+YW^~MovqO$zZ*<I
z%X0pf<Q)?gRo0Ki$<kj-jNfOUULJk?zZY7PV%(z0tNt=ONK#L?Sp8)7`U>-BlOHB5
z>&8^|eo3L)KchQDuoH2*rA<P|&t4Nw*#=&&j1@P8ON+7?6|36`rFtH{>A*Z_lPJ`#
z_mT1Tp{JwMFINAqQTan=dT*&4-t@XqTd3bQz~6r`T@3qAfrj(9N^dC`duDdFJtB;E
zdF`UX^W5%u+F56i+oLlBt1mmvOS&VR92Uc&i^{%KuLZ-ZW{vK}ix=PDl+pzU2eYj7
zqzWbd9m!Kp7sX2CHW!zcmTtmi71K6jEVD3GFj397`H{qhCE&0lr%B}>R{2(BczBq+
zqnou+*2Lty^7zht3;BxIUMIc0W2td}-~M_Gi&mmRM|iPeH&OQ9hY#qvss)MJ=1gzP
zf({NGcq}JGVbfLzzM_d-on)+4dtaRzry6w2+1Ri_U=coXI=%ZHdx2zTVGw<#CxMg8
zVvMc7kq`3@*%#0AqbaZRO<2n4M=t{g8oj;$<iA1it)w{5=)wC!DMeDrpE?Z`>AjpN
zGi@n0=n!-yCHSuGfp7Mw%$Tim*2;UTQH1_SZLn4w>8_ltHN|NiX4+747?&}{E*I^{
z?tx0)7cp$gc2=cZG&Il3&5r)`n~iKl2+>keHQRph_tz^{Pwwi9VL<Rz>bHGlSy@pj
z)^Br}dylDJW%D(o$EdUz)|LXkAgGn0S#1X=!3H*j!?=&k>-y69&krr^F%6gHo`jd#
z@+ptCYPh25<ZjGe9?!01C0V751`}wyZ<b%2&t5NIobHF4Z%x<6X;|0C63|M&f-Ubj
zWMMN;IXjxPUW29k`t@rJv&QGo%o-)$*Ivk=A!60;Ip1#FHnE&2((_y6=CPWRl$FJK
z^yty+93_8<feD5mlUB81^l2VZk<=1OUtipWL+EleZoGfyq$AuSgJRJ3@m_T*r`@6?
zB7oG&hH-tSzIHH6j<f?2A5UyB@a1`U$N2cT_`7$R?1o)2f*nx_38pCGMn*=dY1ICj
zZxi({Rzw2WU5fMs&%WysJ~1Mmb6C_o7~A-Tijc@}m!x~3<=Rmdz?Jb%N@q4%vgr(F
z9=6?Z-gg53Q@AQ%Y8rq~xOmb3m~Tvb%QS?DnWMS6S+fg|j87M(Jx<5V^sG*1M5k&i
zocw9%z`!d6#72M~ar+0XyP<GINm=U;q7)0>mTMZ+p6u_m2H(Zd?MdYBR`fm`Q6t5^
z&%$-IJ!^e&Fs#rHYhm^wAb<oyzy0d+JS;-wYV5mqea-GhX+KGT-ZfL*ZaZlhoGj9(
z&RbltjwW1IQ)6r8EDgL#y+*IUziO&lNp%nX{rj~+f2vS>vp;s&cH{L7pX1+{!2%85
z49m$1aW}V`$kbG;WFhxI2P6>g92D*w>;}hM)9oMcJqd*i>j9!8!g)$3PqpCR$&4GP
zaeJN2V;bx(v<0;3vkPCH7z`iNSI#zGczB(w_zssC${6g-HOU(yrvkSrZVOu|oHmlC
zrY4rrwdvZ*!5n4!%F#<igh*&lk8DTe=g(Q_+218)I70&|8{RG0RZH*?x5S${tj?{I
z-ecgq89SG+VSJ@ve0k;@%*h)kZ6p1e$rMiRgzd&yaZApX5>~h#v3Q1fSZG_weNujY
z{&vSvE#tcDtMgNNo4#az1Hq#)192OhqTLZS9sa+8I1d&Ml97uRO|Rg@=njD&%BYe{
z3|nOWC!ZZ=ZA2#QF{XGuC_%ZDI2TI7-Su5DBLI1w5A&TLtQk0LO^xSlESQH<2u*y~
zs;!8Ojz;hqA8m|_CvX^sZfsbzux()VF;`A4IHatunjmtrQ#m}ReWh(Ka_=%zQ~T}g
z*kqT$zaaG>1M2i}RHw7Ak4I<)4u!M(G=p;1&bIgUBsF^orTd<g%hJ7w2GM`m!XiT@
zOKiAu?#+vM7*ST;CEpwzy~luKHsbr`;jhlqS|+}+8O{y!?YjN&;b-&Tzkm18L3|CQ
z(1*MCb-~+MPUF6TA6iefJr73&g^sVzx9R!#;%B|DQ@UcGM1T79%nt*X>`*R=$5G%q
zudqRXI7=>ki_r%rb9I8igR6|!X?sTQ^=rb&$jI=sv9U2tgRyFRdS;D^#oW9fUCKR1
z$nRTN<PWP-FuyIEJ6IbYv*787&-9)leHP28A}CzeT1Y^q@PONl@MYi~N5Q_Xu9|!u
zBL>k#9{<hB`aLBgT}J96hcXc!lx}k8XLcoO+B&T|ZwlDFg#|Cz-Wg9uC%Nu#EHx0_
zyC>HBq^Hd7XrmL-yer%at~*Vb``kN6-{Q2XSXjb~ir5&{erzb|)Hs-CcVsK1EkknS
zJ)W|U=&p{dO?loKMa#PG8i!b*>C!L$wBO$|igv-+6dV*Zw>p?1ZzvH#LCeJz6GbaS
zKt?7DQM1Mmm%UXXDFML?Ny#7yeIk*l7<5XnTd|B<s&)pt3aTs}A`<HwC<fC4Oom-Q
zR|f}bE4?qo?(e=oaWNUjINB_&oI)QG@xOhKaR4G8F2eSyYWw}eDF7KBwCcRm4tL#;
zHf+z6YaG|3K7WP-DWH=38g(v&wc*!cA}$G+E}XQB)0MQW0zL?E`lnByj+*~2(1^Rf
zI&LHmm^Wz~FEhPs*!{WfFXDRKtNf0<g#}aReb05RTBm}HgaUvST&6><YsmnfqN1Z&
z#|i0C8M*HT;Jjy4`C=du&xuJV`)hv~caHbtE_)^ChX*|wA{te}uStqOVUCaNcvT%u
zcmx`v^4VjwhYmLLD=oe$u$i><e^NbOYK#Z>j)Oo?mN_K<z5RX><!pKKh;~k^DL!rQ
zOUJ9rZSQ@6KPB$RR-ak4y2cu>Dq7pxY%dQ-hqDzZ8_xbd5kBAGY!7`9z0OVPePIhH
z0`>rNvcl3}d!`<FxowP>V){VB--km2mk5Oyq~PIm?~URPNf{YLFn#JA9GVq4MaCB=
zyEx<mzgOH7T+42*K>Te?R`&0n{Ctmw%)+qvugc9-#KTD1=34??VFaiyi;)s8aOFPW
zx1+%mI{0V(Pk{lz!~D19UAAFgUj`1r5EFSUr6D!3HeNt(O#Z6V=#^S`Z&xC25Z4ez
z;?~Mjt2t+Bwg`#SPmTA>uyFk~tcoe|gij(we({jjoiSRBp*(f^DRT97C1=Tah{gqh
zJ64usa~XS|A>Hj@bzbT7=g-SZssP{$9S+M(hXXP*X;R&{__bZ;Zz3Vx-`_beudgmq
z&oN@3sOCQu7EXoimaA6Sm;a{JWU``Q(p)uPJ<@Ug54(Qrt+(Z7Vt`A+NVwD6bk=on
zh}nE_NqGj!OqJ!+gnQ2p*R_b)>y{JEPj*`H9<b}9!#6Wu_}^~r=y*!NnpU&Ow)mvT
z>(UMFeAYWVLm^E#KyOUKq|$7pYl$D;$?@?%s}(Zox2KDRX#P0wdQwQNlBddz{Bmn+
zE3uBpu2{pu5Z5lsKO~9S4Ab|$D5!?iB`Gt=pG*sXbK3N%y9#{}fKP5o93gDhvsuNV
zpxi^=u{ucMt|5B+#q-{Df@-px+#y~15YH_aUGxwLx3ICPIXR;tf9eEMpX!y4C#mvS
z{5s@9Bkt$a?8OpJFHnBc+twBHzH+wVA~slASx-$?=d6Tp+rZ`Th#$B4??h^^HAt9q
z5COlxD<VoTi#}@pnDV;b+dth@Gq4f0<91?t4RCMK=x%SqPkX_pJgexWvqQZlX~>UV
z6<R^z0TDHH1TSy9f0s&aVD~sq@wDhGGmX1$rlMN-OhC5l%|qJSKG9LUI5cO1GOL($
zCqI)mNw(i~Y(V%9KQnH_Rn6Q{^xFy?{j{L0JMO`c{L$3+Kd*+8<)HqeqN12n8*UZT
zlua^HxM8P<(s=j96JZ0=W$X<t?|RQSBnn5jdJ_$;V;=BxB$`Vh&;$nFtGJT-cr4z!
zFwh}|s)oa?3I7^q&`ukbWHD!1qnId?97P&(lZe^(x!=b0EB7N8<Eri;{G&f#_m;li
zm1_OeEMQgdVBVK5W;5-WiuX!1Gw78s^_A%aSEy9<RD~Du&-H_J64Qf4^0>26ZNCQ3
zP0m{ETvEoud{sW%8`5zBmZHOnVF$*jahj$39hCFw4DnP{cCJvu=}5`>yQW0_g0YS+
z8qsd9wvdvwSHA%~=8i4xQP=UZK65yGzWK%kc5pVMDUH3xRsp}r<vX1ZPy+o3f`!Cp
zZMdbU_X&SE_5G8lHJ5E%KImtI2`chSwzi+5Zse(*kM^vKT=@i2lw2R{<+m26E=|R`
zN0pim&ngh$M|P*9OL(*!`DLg2>bL2RvAOW(ag(30jebrlpXx)|@Ux|KZ54KBV8;FP
z<8T(Y?k>R>g|tRWXJdZ$OdDU#KQ`)Rw^$}T)kJTLgtRroohlO1Ab$9F#PxJ3PUwup
z|2+&a_yhWRXhx3mW-X4lD4A`3H+QvEx{#$hls-q9Mtx+f6C;gpEvBSo%kZ2UbV&!f
z<|d~!>>5tnXYy&OPAj0gfD+Giw!yO@Ef4(>+v6tuLK!*##tH@0Idr|UNBppCw>XJs
z`GU*1&OgihKgy|A54>~qVavBgomJ?B?@9N}H67Jy)Td8B?<xp30V?bQT<k#}e&Pdz
z%l&|E@@HeaW76~)e~F!(RmDAP`NEj58)J%mcaywH*qdE<7qI;z)CT`}n#<d9Xtr0r
z!f(_oZ8|$VO4GJEJ20JbT_LCNJW!mdv{um9Pw(b`qV|JEKp+tb=W~>3YaG^Y{Isl`
zZdVYwY;C+cWMv}4CJMPM94_q_mI2h2{9|{ooAs%sa6)2Y*Y)M>wIciw0AiDo+<Q-*
zHh$SpL6J<)!4Z{Awo)jR^3-|xYh(M;=4@jcfZ@9o6!L(Yg2Ka>qZNd0-Fy8o@t}$X
zY>}O@vDg`{&P2p>VW4sXxy*J{%ULl?_Ca$?3o_%MLPjJ%1XRv)xIUWwt~2U2T!vLu
zd;s{OSKRk+(()~E^N`{Y?;Pv5vhLG_owi%r*)u+li7{3s@N|lv4KJ)FYYCjJ9sAKR
zadpC`DzJmEqcrOowLc#?P@GZuf{=j^3+q(@39{}re&O4pV+YCMcaC!4M7jBJo*F9=
zi{>wpN-fQDvwK=K4qst_9uP4!^z?V{-bMTAzFi+UVzyMhl1eQklyY!zAlSH-qny1u
zoQ;>}eZ>#=<xQyU)8VI*1soX_`nM;_pEO?VVXu!CI>)5&+4_#;sSTGJEBs8YD>okv
zUtE0GaIq&dQDG?q^)bK0>Z?>l-B@QdeKjKdsmW9oJ2GWL(2@zc#aK<(pnghC?SrJ)
z{}NBA6{@rGB0V$!KzLq_v4+pLZ@eWo;kBKGJ8>JgJdpix=&4P1=83?&h&;{l)6WY3
zB%Yru6(knE{7m)br_)^pz~x^auFTHQ>AF{Aa$C<#xxhS;pkCzqV)*;(oTy^R8xo#?
z{RUycmHmZKL+=jBu=2--QHso%j1_%V9+zRQfA_k3Y4)9yQ-xxM#T$dAuGnynpFo%R
zPp%;Gw9D$(K(dIH-XvpDad8qT-mKE>TX1=a3+whinqELj&4z-2GT!0$_1StM#+-`+
zoou}Bl<P{0{HUyq3@6IJ_07@dB<@c>mz^(@#J826Z_@oBBHR5)pY{O_bK`nC=C$_E
zO95UV&k4nc+>=$d8^jlfv{*j3-<1UX+0wA{I_nh9*3M)btG&<i*Xq$z(xc7&F`2FL
z(!iJ)!mY+@FBB*Y#Gx#bI+5regvd3Wu5o<dcr=(H@qkSiNz+g<F(C^iNVh44e_~UG
z-1h)?#sO%#QS`2p022Q5SD!8sXMjv_D20(=Pw9syKh)?gutA02Z>}I=_vP7P41;pJ
zEyNjSk#jJ&#n>k(RFoUNJZqdbyUYvg+Y1}7+{O!6m?uJ{1^bLj3m@a`{DW23j&kgo
zK`Z?;JB=uKaoN{-=6xc!-rl=&MD583Jd<s&u8ShQBfPNrmih--l2;?SH=jT6lajY7
zy1YO0l7N7K&-+Rc3G6vq*luBA%|n?)3zGo|ynJ5s{P}YhEe@bQvWF4Eo@YC-3vBkw
zvbidG;*yfLii?YrpE|Sj7Y-L2NZ_>Sj^wI{N=WQOk@#;2{0+pT?1KZ7AnYl?ZT=?{
zj_H*1tf!R@;)VV}{}^jH)KkL_8)K38wOyK)P678mg0ci6zQlFk2w7QbyDr@WeAoi3
zkfWF(p1R#T`P-M8qSCpr-tvsZ!kd=2`e2epM>{hr37byNm)+pXYbsd9V!`NGp~|JK
zBuJ#>t=^S~yAs+_+WxYuMa9q4{S_qoOJFy^Kw?)i9nZsiF!j32qe&DllK~$%X_cc4
z+I4&ogYt&4Pt@qBsaqHnPgZ_v`9OKJyBMu71a*{E)jS69Qpp&9m&Jf`Lzda<CGW7T
z8!vfa^vgh#*tvxPyEXJp=KkN&LIqJ#bWA)72lsj?x6t^)XlwYklB{Zi;rY)<I;+%3
zE<3|vZOF&-GL?w=EwcVg0?-+?&=yh(1dhjaQYAwZe#H5n%y~cVRHD1xa9#B=Z}=Yd
zU3!5PCL#6Ia{+#82{%jCkG~8<A_Dy|H%q_Yp*UC-W|WeWx_~1CI1fp=36LMpxEE$5
znE_fxP8mr=P1ic__wvs2m1_fEmjai3OiLi%Q-T;I1p}o&r{mw(KuGih5<gwdh)C9M
zR4sV>FEpR2Pb$!;9D)e%He>!fUP=!Xf&P56e|S^dwMQ6yP=92V(9TaZmeL<rImn&&
z*zXm*zS!S@9}FYq<#5_i&$h?FB>@J6z6z>!DLFYqz`KBM1H3NJB2!X&Vb>D`T{+-#
zLkxU<SqHBq6Y)YGo#peHx?G<QjC;39-fal=CQWwSYv{SVh1p+WW=MQZ{6X#aw{H)`
zOO1{ymmd)kuh^41OjQ*Zw#1^m*7Xam&xZ<Byw+(`cD=%C+6<*#O~=y^%KwYw#%pd8
zZnJkl(M};RHrsxoW4^xW62p+eFVE4aX=qwcRyr1w`Rzl$e}9bJE^(+*!la_<_;ERA
ziVXq>0c~dhJ0=d~PUup1TrH0JGJ#(xZi_HqH4rHnx*M~NvpHJ;NZX+*vWj7PGX&f-
z2h`*$8GxqS=bHS;S2QmYXFZQo@JUI{kq?;s{vNrz+d=shmxBkV<zzmPGL%KDh8XTo
zP9zNi7exm#$GaflAPhW!Nfl4xu?&S6BLz0C9crd+uJ6@f?)`pf5TbW-E?DRr7aJ?4
zu1=hgknka3xkR%;GcuFryl2_G$TsNlaj(CrslWGoQoNv$Qr7pwF|R8K49>0|_!u6T
zxKqYSqMia}!v~x+x7qJ6Fiqg!KgRzDP#3T;?#3%-Y>1>8m7MF8(Wkz$L$uSUJ@dbP
zWJylPK2cWNb%QvXezH^wh;e|O6ochYHYRS0D5X5_VHe2}$BpOiu6=)xqw(T~A7%?5
z-7c1Mm4cms(}xDnda{(Dplc;x>YFUw_hvjt8{Qi^dU2%vwJ09T_Uoptzu+h{$8?VB
z!HZWYEJtZyXglK75fzP@lWZd8zFjfIHHypSy2<S*x73oWawKI`@6x^5!-#Fexa?#d
zKufhy2~WN+8GlH_A8b;QM8}KqGxPyP3M0vP9!uYa-VCYqlu4Rw9OHN`<tOAJL64gv
zZ;a$WsQVRdrD4T1KhNgyUCrP&O%hL2w{t#K@R3yq!eLCrFX&d-)6Y-T(CNoNQa%Ry
zot95Ym*DRFKJ`d-D%zI9L`ARpE_&|byD<T};0HK7a(20PA9%wnvJ50l2AkLCrBJH=
z=H{WCAe95MU_!C`UfV1e6xuACVzf7xYa@dXY<a&E>;8QmR?G14F1_z&WE}s4SV>oo
zl)p^R$kxYRa1|#hOUeF5f6|#YH>V5H&;blH%43yWLL!+TlZcO+>@94_m-9JoAOnf|
zq?w2^P6{iWFvx0pt{M?N6<%8$`oLgTMCx|v$nDHcD*7%*d@<~SP|J-ksJw^giJs>h
zQjwC{Vxpobd{Jch)(mPtk`c$-(k8S91@AoCl$7V6$@12eOwl&6qsUZR9Wj@-L6zSW
zFF0;#H~v1>LlSeMa<q!9vk~~Exan*$!6hV#RK7$fk~S+1s*q+e?Mo$BU8B%7%#Bm&
zdBUfu^M~;tliS1Q)5(2VPm<N&mOLn`EMpiYjBM`Ot}u%ia_0fSlnp`2W&S4=GIr(4
zjMJngpvSlj;NrvW&yF_Nm1Zj}HLRyT#+y`pX*9KqMk72|^7~k?za(9$(mUSFJ?2cV
zQ-PDnbEx`?imCyzi~c{;^@s|c<ND(nz;)BM8x!Rru*8s@|2k;HHI>rme!f{L<oQHj
z-*cZ`(|OrE&JOJsiO^g4oHjFm(_z^>U!N2%2f>*JEiDx)f)l~ixw?~u5M;Rycb+1!
z=gWO5GC4}$kVO63={?5R50R99zE<sLrEk*JG<<Nb<yT@^b!LVusqRZDl-sTo?H5ya
zP?MSknp=|!leryn73BWH;u)ISRlhbDr5SMLYT|d^BAcwVre|T<+P;r-8%dHOiMurt
zEG(>p)j>Z<Qcq*8$BNzq1WAN!y$|<1K>Eh2`gY)WI=i~;1QxqCT`lO0UN+&(on(&I
zk%yxQUe}ox-!$nYO4Ogt`y0Zld*;B}4hPDRLe2)s(v*~zV&UOAGveXl(NiOa&=i!F
zl*Y=<?_ZuDJ2P5ORxksS5$NpftlMeEwwS7ltlg>!fp8dJmrdaC2M(_T|K2^vQ(jsW
zGCAp`KbD-DUAVg4+*;LNt;>%62;9|Tlf<l<esW!Di@CMlI{!mLMfLtyp&Ez0q~sS^
zNgyToVvk{NAPp)4eA;R_w~C62l7tk~5`ddEisT{T5)7wiX70rv#4P%N{$pz_^kKB4
zgGk|@=;aDS;ak#VM9epM%_Ykg>?`(;9~*?V;YU7O8br^AjX=l5v_Du?%-+k8h|spf
zPXl(f$YD)sVZVq0Q9i0I13dazrFELACqN_+K;#28>5{^duh5F(X?Rz#vX<@kSqOGZ
z4&e6a|2B3*v^2$Oj57&nP_nq7pg0$<CL{onOXPaQ&p)pCIXe2KogD`tt!iQC)4e5N
z>OX7OyJ_t?TJVq}YX(3~)eG^o&e?Q*T$a_HbV1CX%ZjvbOS3N86S=e6h;#)JC?{9$
ztnJiP3t|-8=_CRUj|=OMNJ8$j8UdgcJX_6xoQBM~r~|dm*1(Z1?bU%8hFmj7)%?}h
zUQi^8hY~m4*YRiqjZjKnKFfZk?|1&2XpqQU&YV8-B!zM8Yf5Ap8R%UuvYI5gto`BH
zrOLaP>e5`sn8@lHXytKLjX@eBsk?Wg{Tcwr%E>Mwl$19-G!zFCZYw|_a<m9Maskm-
zpX^i1f#(yxzOYB)zqs|An3(^Pe=fWF@~MJSAoDuUxYFOHlYI#Jc(_uzjN8gWfYw78
zZSCX>#=Y}zZ%TDX7*BR3vh}eCVwYs8WAUle*9Kq85Cu!>4yxjkL`3uA34JIwi8S@y
z|1;g~qAR8rO|pyK{rUOvR+h^CT7OGjdoS-x3N@>L@OK|HPUb$p&7ah3X=i}a$lc>9
z7<;0=zigFfWk{~8bDy4jL<2QA$R(A4th#Je$2n<=W*{P9bEI;9)b2ruauVgZnAXpk
zXj&?Q$1!uiXvwd8JaT&(r1$iv{YA}B!mg9Jv41qqKm`0oxC>`-gC=%aBq}N>h84cZ
z!X2A-lxCkmY=v@M3c`jbevIa4Z>G^51)_)Hj)ap5U1P;FxKg66%D}+5d!E0L#gRL1
z@ViMS6^?3=&X4)m>D9WNUX+Yl>`fOx%y=%TN!}nyT}ZvlO3Kw$uI)G8p?Ru{%JAeI
zX0au5V^3|gsU~@PJaf04eMp7+94kEje6&$E#P9vf*Fb~*c2icjoUwRofhw@`==mgf
ziZrk#P+*&#IfTk3$>x>3FAgz_ieDWG%6tfvy-{dq0O=78y-RtPa<JrlHo8RexCNX3
zhe^}<=m&Dee-nwcSLKp}z9~`<=Vnzno6gSE&i}_1uwa3_j5VqfY-Q7p<RD^GOYOdy
zK9NUjSc=4(G@lp2XZQ{CEfsQ?4YBvJVlw{wdh-n-TfVA%+19d>haOX(4Bq1w{K#e*
ztndrwN2mtNvT^!o@vX#Ge@zTqZ*2$CrYhTZ*;E?Xif^xHsFL9`l)>v#dND4~YeVbV
zeU_#=_~`a}I2&%s3Ni{L+J_}2s80X1x^p<#<xQo;2N=9IOHM8%&4^?AO<yms`CFGS
zw@>G^Dhv3uoD`^pRGwVJd_miP$c`UQ!Cg=@kY4IhJ(`YhUHZ1TDuIUr-Qzmmyz*O6
zP@}RhHJeM(WbQwGCA#l1h%oDd?{-?))tW}xPgbm%!Bz7@0!v;oE(ZP0n=BQKfw`Qc
z)(&@Gn%yuoJVAq+tH_&tLz`08fz(uXVli`sj)?XIBGR86TZlk5+&TtEs<(frI^?G1
zFx9@_lMuaqkCsmiM1q~AxGa_RsxS(sm5n#79W=tqv=d!jW(rm0HXiyTMAm^cRCeR=
zHWePyP%_iEv1kYLxG2oyI<2BCX>HGYoJm=)dYxD$;4@E=PgzyGWAllWU1C$6C>#8}
zlTIf<vaIuTy6nfYJL|LAh8bK*(M1l1CWc&XkHNANWh*_5qot4cnRbkxEwI+~TgZoZ
z+8AK#GHHi?<|T@3eg@O!GQ!!IG=6(XB!qgtLnepInn>bU6g76iT_X3F>1XiQyLIGI
zL6Qw`a9Y?%m5B%bUo0^~i-%M+A)@#fYHpNxU|>N{%KCIs>-9zD+Pkf3;r4HWq_^Z{
zlrfm7wByqHt;^>Kxh*Ga3f!W#$=bWrEk$U4Cb+vL$&=z?T&<4GPC`U3h7csD(=cE)
zT>dFz#3b$|Tv#}0)7;ovwKSv|mdy?ew9w2b=A&8|h1H;~w&-L_nTJ;`c4~)8TqKU%
z#vcwS4fw<2NHiDmyrqsZCP<P3R)Z1HQ3s_QoS8Q`%V+D{%$mu#ly@*mXUyN;k2-tc
zu=#f>2wW}VupE)yHM|P!VJ+$j>@BsAq~>kTD#Ei2Y?)BD8}fcI;_u2x+kCvuOZnpZ
z8#X6#-{B~fHQM1)xBXRx3PRuIuBnru@L28{fz`$}9X<Q;>K|OyLMMhk)BrYYpT&2u
z|Ni0*TpMPbzHh3wlnqe=8-=$*;QFy!ibY4NnaFAoD8PaIH=IgJN>iyx@$o@z`nCen
zF^}gRKygLh-=Lem0{voPVPUM+xiH&0!$w-pf2mt8Z+Vw3uz2WOh>^V177<eseT!6?
zv$P%4vsf|ZjrAwNK_~bT^n9RI-hnY2LNN+Tu3rEy=@5{{pqeiq@k7=Go5<7fG$3Yj
zyg&*8i9Dp*ex<~5T~#`c1xj(t6DUUnJ)ipb+xJ%&ygeS>h<{UBcQ2GsRW;8Obz^jG
zvn2TCKNy0inF<R8__iGgtUjcq=7Wm1I$B5xWSPO%R5jRWKKf(dukj3T(|`1@c_J|I
z>0g|dxurNABN!BLQwdO_?m!7`zxWFu6t`>FGpF14lX@GD!h*?kv5OD>e)~h(nG}3?
zKC6A%5k&K&AFuH7Mdra10y3Y)Y(q*_RaK)7+v6Z0Co3$*7mi#(lgt9W1}sX0)%L~>
zmmLcZz;HbTqG2sjPfrgZY|kb)2=+3ns-K4yX#_9JOp84`Y|eXf7?kTsqX?~d@7Ofr
z(|5<2i^m3?;6~8#0TsuIBrKTOQ0C|7-@SVmFP!bPIl-h^_33!lJB@eNBO27dL|*Ig
z%|hWT?Edb4?rhnF7wYQqh#<St`^wLX$8|dihI4VJnsEa5cUsKqBQH+t(_>w69K4%z
zRc5foMQQGP@rE$+@O+-BcOU+)NDUKc1C~<)3Wx+=>ls5Gdr(>485s$Lmz9;>2pv;6
zbZ%4|7;1FFvEHnCRv`H!kvFh2lw0dQrccl<>j1zapC#@lLs^VZM%Du?T1r<0g^;u;
z#q~ehjw34IV~7Xfj>?xBmu4RD;ln@R*m6KG`~6zIv>nc(y1!>r(wI7HI}K`+ng0=9
zQelVzio2I`^p~I$Ojr-lNwc9Yr@1zWjQx-78QhD=Hg|I3nf2JmMarvR0Oo-0g-?s~
z$&(PEF_upgQLTSJ9$p~RMZx9^jWr$qEkqk<yrCZ7bW5BqY@_k&1MyhLyrxKgMuqi1
z_}98FV*s1OK7IO!nw@>{WUF>N^gS9T39>Hd;NXA@Dio8_W5DHXEt`Dh8k{mbU;r{e
z*8gJ-p0zMtJS0)jUODTv&bI<gHZN3Z|5_H>M|Hd{k&GzPbFhmPHyN_PC4t7q!O4kz
zWOVr)+$%n7>~U8Z#8B%u+)=t+i%C&*{kGiTTcp9!Ige3j;Y-3ul$f;sZT(dFFE!5F
zec&W}z-vYJ;K2hfv)@5bCq2c$q7-ll`tqfbG|KDpPz?iz_?fsk1~3-LMYEWxOK>~<
z%io6-MnUz|0|hNv&^1cKs@flZxR&XbI!*NhcLNmcD-trd{a(23ZVX|w{?xE;I=s;3
z$i5vf_<+JywD$wm0w+-m+msdi-S7W>Q*#!zBQmpI$cD-kqtXnf%8!k<N=Xg=n73OZ
zAkfvmMluemi#U{bhO=CsGT~$Co}OMD)xLRccXMik*b3E%jm$^bGiFM%DJUY)_DRN@
zcF$!}+rId(%w(di#xZl~1iT^nhId<q&GuF^KE<z2f1)Y6+(A3g<-h(GV*X9<84I@C
z#~_y|g5+m|<#}u2^B6mXtTx0?DrdJYi)f0<EljVgLn)T&L&?|cbwx$5o@2>Bl$7&#
zp5-oZ^V9Cp<s%CDQy=MjpB(pi8Z(J~wz&z6FqP+D0Rkf5xifBTm8Uv{<)hca^;JyJ
z;M6W?P2Ui$K=Uut9M35c{w@tI-}||_=fevY8&3l?m;eB%QAbf_N#MmdvF$gthuojc
z)`WLO5+pZUojHK>BM!oGPA88@Atm<@>2CgV?>w~(X2oEuVCUMb4ri`tYaXN?BS_c8
zdT&Qk44G(Bwd*&!BVjlrw+r4jNoFM!2BRe^(o$)EL(gWjFWu_(|H0rRpHfF0L1@|%
z(4=jaaEAwDuI+w3>8Bt<Q)!#q|3WK~N8v4F=b2pg(skcnVg{M$8_wux_*R`PU{{)R
z2mkBe^dEQMPPlK+>eD_GG%A;b5lt);UJIyv2l9>52gq~zsV+CDnR~zSVXBK^Qn<Em
z?vUX}<fveWtLN8sqAk<sYvweyQ!U(s&!kSm)%F~5$ZT=5La?c}hwi$)rcy5e=8!G%
z{#50H3BbPH8?=1>gUh{HZ5TULEvu7iTFMJg`$g~qi%X*JEYn}?_&K8yL{dp%$?KIt
z-L*^cW96m5Kz2kN0{=7+yMXWR1LGRHaNwlho+yY4%EM#Bm8?dW3ggenI#GV-uT6gv
zWjSibE($XtSVKfDs-s}w45*Xg?>>f)l`_lq96t|9?pRPSKdG36d!fO&d?GtcM3gEc
ze$mZW@81C;pX~hdJgbdzAlSbZ?WpaK=tWbN?NOSQ8jsWP2qTr!Kd@IsJV}#cLtVR1
zP!8888@WSH{8mj?hcREAw3G?2cObOGB}G3Hz?;NaII{`XIyNg?Ms)!8MaoFYejt@_
z;v%=ubb1e!RM5)fCI%+2)3!3c6dRW!*7ZNG{1lOb0$OT*VPdW4@JSe0SXl^byP|(u
zc<zwzJ(s0&jfqM}uDB-{ky3h%u0Naed0+R^1)<ip>*Y^*lt$>^?eufn+z|0@?LE6`
z-M9%yAoJHNU&J<#wUd+;3WY(1!SZiJg!|LSk#*|>sBoY6a?y9ZlaB?{tHb)T-!D;`
zPYg}F&LKnzL~4kKx;py`>oY-(lVw<L;bk0XqmDRCC_sv)&_1TJ<Byp;;Y`9cI5Zp1
zWqfBd>&a%D9%n#MEi0G!Qx+q7bL+!kM*V~EAap)jzAKAu?P0MYoG-uqj*C62E71t5
zJ$jQ`@Izu1Of0>c>C;Rg4H6Q=gB#HbnpuBHo-xXua40=FlbY8rpHQ2i=q4O?v_)B+
zolSZaEQzA*izv)e=m0V*=Vk{A!KwsjMP%fB-cM1M*}0}8IlegiG}ow7qp<dcAj$VI
z{Rj&lV)o{5?p-(X=UGs;YWvp}gEiUn?$tQF-(p%ZaMn^u;&J7DjE^O$EoEcWpDG8?
zf<ZAIy*H7YnwGY0vl>}z1K*PAak>}7s*?&ZAa4KsXfp%}ra;kKUz^;a;GN{q9LNkl
zN{(vIlfXOgr#;}<^c_j1hNwYOMEiYD!Q2u9H+iW+#~l(L3rWB~K;Ol$os#lc%z>Q?
z+;N8%<0byUOmEfg%ip0C-uRc1l2TA`CbPyI($}*(()xwU=on-;$+I0mt@J687c=B0
zqg``Z?2vkyB!tc4M_0in*WI@JNI?r;mjNt8Ps-Cc;5&+K=3fl&$1%NWHi!^HTJ63o
zXWzjlVtPnV9|(#`_C9DKU}v%+gB)^vP~Z~S8^G?}0C#-oJH~Pi^FdOEseILVfqTN)
z-`4oZuuaY}@T>KKC;}a=15~@<kdX89n|{7QAt44b6xTkWOjsiwPe=!2jt3AnnqXV|
z{X;8Bg3>caXUakx%jRg~4J4K1JB{4-bVSjyAuD^Jpg#{G5)P8e^Z`H}J0V{BUZYpR
z<=S%_gN6#V1MJ29BG<A20RSP%FCk&lqAkgQvVss;$1|0qegip(G%bNe%5~oV{#cbQ
zwez&&{qgZ-nIO6Ui;`NB;vsE6SYv5h!O|x%s5(>)3O{<TNk#DwIWAsFtH*f&t<2BG
zS>GT@?bAc1+?6!%)Ne9zCHif*k;wV+HW-{z1RT?WZkh5to~EItjRf5su&-<(r`hkv
zkmkuH-o8zY+j^KrC(ordw#|)U*Q#&WNl++3v%pUNv{}z?yg_eK5=lpOFaLk(C<1V%
zuiFE}>;peme+Q+vELi*OgpMc73%|imB3)5s!Zb7=K&s4c*&eeX-d|2oEkNAKk{?M`
zCQwh$nE#_;Ki<&9#ziC{%8~>a4MeEnOsTussLl5=a5~!CYdmK_c1U2;18YWMmg#U-
zoCkSQZ#(E!FTm+{o5m{5X4own?e!W#_s;yx9m&@H&rHSEvqc{;dlXLQ(Ei<y7KGMo
zfc{HBmLZL#NJ$8}w@8;_p^gZO+woQ}03foJMqmd8_j@>zNETS4=*Y<DfK*_17)Xm@
zC^5S)Yr}~zm<FPflVw->QgXm3ZaGo5d$8#u&Qic@Jrwf!IB!k-#<2e;sp`;p*^G;Q
za7~WrVu_1j)7nh^Q}Ce~PgU6>fe|<pO56_3I({NxU|@;e;`0VC0jQhd=UcTAQ0ef6
zuX*G0;oP~iyk6Lt1y(mr@5@6F){u*X^s&JifN~_2UC-Ba9rvfSK%|U8oa9$t7dDHp
z)oZ#2*ch_OtA5`_GmA=z13HrzQT~q>(?A(vP^u>?mO#fh{_MJni>LtdBzi?S`B$eQ
zhphP;F)Wnftgl{g@jLrhDs}qP840G4Y(;i_mqB#wl;j*K?;Rty<Z3|+ujU~9zqtgF
z9*VvQnd;E`Q`Y06+UZ*b?$ly09JtC%(V~gXg)?qHWava*g^#k)I|9S4XV#rIdfRs>
z^4~6v*ob<VHQhL`;bW*u;6Gi<Mi%k6xoP=?2@fvNhwEMJeBzr(-Y|)UjF?fH?Q_P;
ztt(u_tU<+^$p-i8lboE_CwBFw*nLkO5J#BYN8%q-=Ue8pe`(0BAB{`Le+~(XhmlbS
ze)d>)ve_Li2s9tjXDAUa_>)8yy|?;u`+0Aob_5U>^zey$516%TvT+^;A5C$JQHOk6
zWz-n$LLsubO1H!Qd36c!?(&Oh<^b}2^V@w+ZWbs{Ogrk1q!RgQ#U`jDe!f_cTE6c2
zuM5CGihzuAY+s+C0FTvaE#0m0sx%SniI}zn*TEW^jLXlmSkC<pM8ho^(c2Gb`DQ+U
zdRCxEt`$X!J#bzl*AOJxx=lmZT)pKfg&@hM>A3O1s~$^9*^@9Kh>%FyrmoY7I{O-B
zn0Fe5(a~ytGf4D8RTRCh_0=#dn#E8YDt>1Q0U7JW@tKjYtZD<hPk-Th3$7>kiTI*#
zs?fYWuivhM<>|zI<d&qPcZ4{3TWhEO_TXSXrgh^c!uJNe`AtKtH2GY!CZ~%@1D#yp
zgpmuM0X=5UMe%jz@}lPNxm3A-6f-Ce6h*17Hj6uS??faxD;K=IY0!T1kP)8&2c8^H
zns51awQeb*z!s9!pRI^l@Rwdc%hi5Ng1KaRbAre4qtk=?_{uODny*`2f1UHCw=;%O
z{dk)LOU;*_Q%n$UuMdb`d>*+i6rhpNV>0YH$%Zpl**~cVgZ_fvflcsNe&y@71i{5<
z+Nv%N1>aeEL<Ru^83tZ}tt?IO5sOrxtFI($!6vC_Yb*g7-ekf%1Uz2FRfHMcGVfe@
zLPHnX+-g>iSH31naQDGxnS#nc7>&%De@n+iTfG5R6v-(RI`F#rOqAD?e3bmkZA(>V
zjkoVYh@kYY@MWuV+PW3b(;GM#93-OTi+IjMs@`}xP{xA!{Zuy}pdmwRRMgx!<`IF%
zpf-ZYCp&dW*8txZ7TfP?WJDZOlQ)jTC!bz?a&ktDrhfu2XQ|}(4P8o$Ix(#Vsy$v>
zK8mQs+jyRXX}Zxhl(}W;Q~6Tb5mK2&=T~J=BXbozy=)Rd5@T1gSmA+nyo|9G`L+za
zK6d-u+Rqm4e&^nde&DW1AFGwayONV6ElLLa;nR!ZP2+7OP&IbrLmWS1^ONWe-;vv!
zlocE)^DK_fi7#gI8u8Kz@Vzt9JJ9IlKip1c(}`z#e9O#i`c7mwIRAR|i`M?O)AIZ%
zKE?}?jLb&Q_JN)rv(zwlS-aDHbKUo+``lIPi?`Ja*j#dBmmC~(N>Q@UarV4kQ7t$F
zKrBk&)Z@A<H#-&`eT}v|k@$R3I*b@qLj#A`l9svtsY(GK70^Zx$1Qe_97ypvXUiHq
zdOrd{rLhs(QR13a8vQnP<6W=!<T)_lI*Pu^KKlA!h}^L_&b3X8pXC!B-DP+@IJo0@
zl9J^}FLK`AJqWBPJob#>?js@c#G+o>dUh0Tm*)o7qPNx0EQRE1bAF6R=QALv#FEtr
zl0kS<u6J{Ul=-~Ws8>=-s`Kyyzy;D^jQkqFlhd7-E1P$~a{~7_vT+3I*uH<i1Lz4M
zSM0bl{hOM8VfS6SOl^u$)!Rdh@ksuOyrGh-!ewrPP@)KJ+GRfX_$M;x1pEJwh%$1!
zTJBA%26IAiM8pb6VvA7dFT)?B(H~mj_F3{NU0_#;2M3HsrPViZ1R!m^&@9w4T!~PP
zW7)yCyw$LJo_X^jOCLRZFnSH#=pxv^8op1V$Tv3rfde1Z$R@}{N7qiwW%3DO3+^cV
z2M<g^0c$(B03X+b&B;n<M(9dN*ah1zDRd}6cZ0a7=m&Po2`-6C@P8o*2n0yEeaf;R
zXJP090keGUG^J5J^s9I~2;rg56cG7Y)uxb|N<K@DGgMF3{`5Ew?MNDbx1o%yMZNOJ
zPmzBOv4cd(An6o)o;xjE4f^5}5{hbSl4(p?gjOP3qxMmuO$otw4bBDNmiSWc5)py9
zqIXJr_{nExPOa+rB<nw8r1*Fw!eoDsEEe{*`)@3U5`P756fRPfk=!P7=P_?OniCqF
z|HD;%0HQ56Ffb5Oy53oGXlSTrf8!3P$f%(E5!0JDZ{#K3=P7s8)fsg7dNbqODpe{o
z$5}BJUB5qHYND?v50@M?EN${Z{j@(CZp+WVv(qAcnyyG=r1<}UYAQRdL$#Hzr+#=u
z7v{l1(Yd_^9!<Sjj!V4xubzYlRrVyCZqr<4Z&-}2Tt2@BGzvLh6g<?>0pQ3e;&$*D
zeE(Vf&=~OR@B&+8-@X?NNRbH%YkWgPLoDlFc82B6zBiBRvwmLLH{Ebl2zGftayySC
zls$!m0{tRtv1Q9VGQHWz@4k)$^YI`>D0~3erRAZON>X0lDQ0PL@e^Q{{3Nk$eFKA(
zK1_A6>mx2e4*Lfmf8Ie}Rh0;0%F(?Sb{M*7Qtm_m=aJ^+d$_(SQqJrbH0tnX;Qghh
z`P<6%VVyd%FiHG_gJExHxy#{!ufnG#Iz7XzY~T|Fzu8M@=70{Od7w0*rHk`meH70J
zPKkXl_b8iT7a<A;9)&DecjCbk1@UIQHuTM6vLftse{+%&1oQx$2hHHn$x+E80^i46
zf^q62@N(8}H%ty!+MBB8y|onecql*;AR(vc_SQMBCWOR8Ec1)w=I!l<btMb*n|cof
z$1qV{AN>orIwB~Ma0`NH04PM&LhVgr8jsb1N4~ySR+45jb^O-#heXhjX9%+N?_8CT
zYv>@DazTWJ+lz;jbtZ5c6Z6?Tq*q8?{H$T62X$7=W5wp-N@^;v{88`3=;6VI60NTK
zb&93Z2DL#B#e!TaFpZ3GRinnIa>m>?&lg@BC?ale%Xoi72bLG8RyZzhKWe2#J@oZL
z(9bL=B-#4pmd4fjKHCK`4w~uRV|X=P+fri4(#HQxW$4$f3cDTMCI_p<mHk@Do^*o9
zbi`M=U5}|XmAE}!oI4E#P+s5cGV)vM*7}I6OJKkkI-(Z+9Rw!HIjlUz!Lqs&23#H@
zhF9*#HS*Y)JOdwXWzju&f~BGF6+M+iC@I>Yq{0PpGvdrbMJ3XQ%OF4|5(=n9L~#V2
zB1lh>KaqM(mT+MtB%-Q^53{|!nJ?&-{!voa8)1yo$9|Q_=~I8$F7cm_)HO>W?vFc`
zwPC=!(U6+Fc<Cd0qxWDft8H9;EU;doORa$S3pAnL7|W5<N&0L|&val1Wm3Blr_M1(
ze9z-M2K{CpBW`HYHlj2>H}|tmU0dvPddTyv7$dtYoD0`-+m4#rqEU!B+bh>+7-4We
z_c)5uCNFcO<w(ALg}zbqbEmj;3tyK{yr@;DM)7=ulh64<^rAw|8@DItBY1apZ}0ic
zJL4!0vPh58aoyHi*;HsQA6Lk=lwSs(J^T>@Ch15YCysfcVO28he=Z_{^jOsB2xk!^
z`a--y9x=V)xv7F2+?VMA-o(qc_4q&j<mXM0WeDwZa*FJ;O;s$9P}tncyf^p@kn_~u
z`9v?*-94JSu>_E|x?h-+na9VuIzqKTcZg;?U@;t;cClQzQ&)rZN=kMQtwRkR)e0<_
zqKug}Y8*~UeyaUm0q$+aWBC5_1mry4_nWnq%yEO`H=gWjd-^JogC$CBa4?@~wj*+p
z_$U38yg@^jz47&GV$CzyU_#v8U5kjcO(im%->JW4l-tGj9&+CkxK5PBVZfQE)t8)?
z|AYDu(K2F=1^n!yA4Ba8dz>FMhK2P|e)_v)B6>Rw`=w0<ey?l*a8wf0p2w$;AM+wZ
zD^hRPa4PoZYKfcATU$|<uZA-FduNAVUt(^z+cCfbW&Zk3V7Zj%CM35`vU^WMN99Sl
zq*Z#xR|U$Il1b~p!rn^HeFkXsRm$Yj-mjZfO%~Ddo&@!a@D2}Yz78G_X>c%@@WH=(
zU2T<8=*J0auINXiyx2>8X4$;vVdW7yTD#(6gRf>PRfs#^RdERjpmE~S%ME6MD`a33
z^0&DqIV(?ht<;^BaggB${?OWda{Rzs4yV8KeIR8Po5x)crK!ZiJ?BN#nx2)@-o<X2
zwZiWQf4uXoBXfUWfaiG~rGhXr{?j|mmSp<_<?=8%(prPNY(%FlyFdBeH2ZU4$9DSf
zxH38VBOHf68octDku6`4jEr;L`LI@j6a63zSLAKU7%p<2TyF~0As5QoiL<46JE{aw
zi^cbfujVNy^YF6!&`j*&+{J#02M`sXp*a9=rJI1amQce-qQT`=0iTjXZ*p*(jrOai
z8$G({pPL6?enYNCg*0t{{O8#|3@18<mPo2!MlJEe)89@6n>I4&l(Wx9vl6$E(M(m~
z5vU>C?dKD5UEfyQ6m;UJU7uW&#o`i%>fkcgwKK0Bv0Cz<2R9Z6vk*2YTYWCEnKXp~
z)C(FpImD#AiK+!FciO`b?F7~SjVVjjC!cea<)V`(Ph-^F%)J>I1nw}O_LLpcVSfM3
zUh&F7P78U)-?iQf=e5{WvOrC}c7!O&{KeO>`DN|*j#OyYET{kD+NKt)J}_aff`=2?
zuKPqS{N%(1X=GkMRS?`o2S;lb(qIp5gvg#;kR!79nTSHVi^1}uF!M18y(*8gVYI9c
zU#HkI9A1AbUN|L*r(U0fyG}@_ENOA{Ts~pf#24vjZ@4_tfc9b;B_#q-_88DG?>M+$
zK{ErN-Qq1|Ee^1XhK>$dhc6%yT76n&CirV>YvBlhTt{;$@Vf%mO&UD?Zhmi+sSTC!
zDII4M4|P~eEkRi)#WNmZ)IoPNWD3x$BGqvgL-r%qyB&h#=NnKIDEZp;iJ*u<A5l<f
zXwR_YXI33~Xk6#BU&euE;!;e@;vz8GcFUXnw^RYGM+4n80dO_XS_fw=X1$Bs8)+>z
zpO|>T=`Kps*f-J;xgg@Pt^}nQL$?=IMV_~V!qUqAqyaOZMuo*!XsZri#CQl^t1n+3
zg3mN@WnHr>7`wCgCh#%oz{G$>sR(E_yYAt>|1I@BqOvQFVwGdJY&#W+O5h&K^1r5G
zqouWtl>+yPGO^g}d~X`w)1_i`dgvkruhv9PJ~-%sBzq6=Nl*&`RFhHj2P~H9w_sc)
z>;BAausKma1tJ}?0k&?h>xoq%CnS96%^;I_{rdHlwWR0`9S1J&vYO#<e8hf=?KDPx
z2f!3lIL(PrCiW$lps2XD-OEi-zK+>OUs=me;~`%>??-?r)wu8z*2!eHK?ssC0^Ht?
zx$ese#wLU5H^JGv3>~lfi@zdelX!YJ%Ry)pGcw9TK*L_`!igviTdW6XQ-j8y7=<27
z`+=!hTcl6vWuAT(bKFYtt3u^RW6@1v9`t;``XRfJt0_jc-I)Q2kv(<lS*u>@Pg?;r
z0?OUu83{C}r$Y-fvc3G?6IGiL6}2TdFTkZm;}*zv=KrR-UW&HHj7Sgc=&`gB&D|6?
zJIBC!yq3eW#lM#;Bb|mO*lg}!;ePwe*f#}R)oMFK=pOax+y_v`@480=zB!~QhLW#Q
z8IF`txy_f4TmhdVfj%HJVIZ`i9<l8loq%>&C{XBWXlS;1p+l^1u8Ek_NVnR=LWix$
zwVg;naA<_rHkhzzR-=sWSLWri;_^SW4;u!Y6y9TY%pv<ZD=a5Dbbq3N*ggggozS|*
zC**cu)AL2Z<Alxe@9#&nv^R_Bd6DiXQ02e}_&z_M3ADyvzkWS@`jpfMyy_*oO*bIO
z+rW4p&uM%I`kR-bMRMnNL6D@PLAl*B>OL*=mG!IzNGx`OR1`&yngrZe7foebGgtT6
z-+cXM!!EcA{p;+=euoUGLd4;>CJX`}hXB4W{0t5&u@AD@qlFE}017aSqxX|69iVz5
z)y@dvvtLNBGO|PGUu!h%Ul{21$X*WD@F$#{!zj@0-Q!!Qg^xvJYASw*xLr)*4zZjR
z`_PaF`Ild@r(eeoSsG_Ch{tuwq;~stcA%Zy-GZFVJIfKWJgZmmx_sNBA<GhYhMnms
zb$>smM5k3k8B(_V&>@=OgdoGol|S4-(~e+Zz0fBo0}IfPg_!xQQW-OwPoK&szDVCL
zEJ&eCq(i~5`M&L99f-h(+$cH~9KI}jQVwzPWjA8mO@t!yMiFPHcy_33mSYFg{8Zw=
zXf3KM^r+!SYR0$@d1NjnrkN>M<j48pGoY;0P8pxgr=cxcOfPTR2#Q0tleBU>c4FZ0
zAwBsxoUJ#-w;f1hS1s($9!s~<TPbtKQ9`Yfj=fvAgG4v@ona%lv57uij;6_=x!+e|
zotek`u^@82YwAFCYPXfJB3o(5hO%B~3~oyaNPqa%F()p6)VBUgTAi#L+xBtp#<_r7
z%fGpghP3k9NSN>H*-7sQ=b$4y(NttwqQQHcIy<-5J9@(bp#|M6Vxm;1`?GpU)8X~i
zPLp=91h&W}*n5~M<37xQ`1)@5lHRNt|C+~=r1h%kSJ~G3dd;iv$M0-jA}6LzL!*mz
z$BBUjgDI^<Vfyt#r3DY4fE|f*@kCj}-X?AxS)}v7lgjhM)Ou@+^ToE0$5A<W_RQ`Z
zvI^gvTQJ9{IChfM{)!wLG!@E+C;W|BRQFkNO`J9#`2d}WOCj!9JZrk#DBYgN&mDAe
zSsIW@PEt6=diwWLk_~$azmO}P)I|OB3t2P9)(!WTvZdYgx8%C6bN}_I*Y8|^J=@FS
zQ&#Q8$Z(#sqeCxAsx1pmeZ(W8gg%t~fucm#7kebgUodm3S8nGv_bP5de{mp{hKRMo
z+ni{(`~&)!z_5o|73zm$)Ah|ybw~swDI81wG@dN_QIfrDjQ@dQGde`x9sna8Vw8Iz
zz|GiQ9u9o<i_6$!`S-<!<pgDO+;%Kycg;}GO5zV<a}s%2gv(ty4>$?pdkucX5@FNO
zyNf(#MWRSM{sjd8-RloB#W#t&(JL%=VZir$$juSlXZE{Ya{Qkkx@?yFXpU{`Bln6$
zd7Q3XuKQ_GR8-@4EC~;4?tA)24OVBUX|J8p!5=0>r-KAjcGUt-T+<DYh<}Y|(vj_k
zDQ4&r56i75#!PN6nRp<7&Cai-lDEZz7e%HcN!IDMd<%{|q#tPL6@zu;)(h8cqT{Id
z$dPkV)89}Z7W@5o{~o9D8P!9EuA?pgL0P+6W;8tE)uo{SyeihF)U!pMuIT`2TPy6k
z>J(?viU0Ga27UR*JQDZdSHCct<Q+TyPSUxd*k#lDw8qI_>hh@OS~rcmJ8Aypf3<er
zQBieUwyy$&t0IU<5|B?Qq6ATLMi7vwf`}-RBq>VHsRRj<RaCHul0lMy<U%DQP$-h2
z$e@Dc9D5!5c7Lz?z1wfRG2TBi9MmYP&e>=0wbz{Ux8sqg>pW~PgBnV8|6&YSw(Gb*
zAi`FUxdIl)#rdjhp_ekku3da&`20Z)`tVbAJZM<{ay;WwPS6tjCDnl+H#yIm7k^H>
zx=3Yo^&N*IW8_E!i$D0|8RdpDg}42Y_xHbGMrP$%*^zaOD>%wJH=(h?TtENm45P*^
zqD#rJK_N9CQF_7$biUwkTgXn}ImwObt)h%!Qt@Sh8GW!65;m7~nN#CzrKsM!cFGw)
zon(OGJLL4~+*c*75k{c`3Ukz9HEdC2)i8uUItzW~Vof7g2&2Cj)nbfX16~DAGb36S
ztPu6<E)#R!Yhui+3)I(l?^n`yv1yr|lNw-1*@DO-JNpqH@431RHnG}kPvSTpX^I;w
z8e>KheFyf7InHZUWvUbJI_nFFO@O-ymciAzek33b-Vs%AZy7}MS(7Xqc4ejyYxsr4
z+v7VRDtHn$^_kEhLo_r*Fo86t*Rno5_~6&mNRo@hG=17Py%d_AM$<4)a}>-1x7sWN
zI5kYp`BX%0wCOw;?5R@RVuHqvtk?SFpk1v66cwOaEkQkS5$Hu4a(17v=tHj-1RSM)
zgYO%XuCk!Tn6FH?^WUqEt*=*Zj9{K=xdT&PRlV{@xyt6}Lw8Nk_smfTXOk&a^V_D|
zJ7e^=KlG)qHKMGEEWqzkyuV*(ezXIkakpzcML}syzFllN<oHzpDP#DJDq}cQBPX|q
zpp1gjU`zrUB&;q#5D<pw467Y}iwiRA<M~zdW>5|x$ROx#dHi;T@0_lkPMfiVK4NO`
zxJF+(-b{D9jB2`TWYw+B@nEme=4G$>viMmId+=#pu5g^P9w^C9<^Q{cn{@&S^fH*c
z%K=S*SjcPl)^S}f5ZcsncP~$U53aQP78Xm;k9&J>0RmI-A4yhr_C&pWbB(UR1C-^3
z*v|nqXClc?;!b=dXr$8^<C5%W^N1&!bv*b#w=_48{?09M<%6_8BjWrAL)f1cp!A@L
z-YZ%?1;zxB#Gn}>{<AyxO_$3-+z=|VX+B^2?~?Av6!CS48c|u*+@|wW316u5W1oeq
z@GUJowr$D-37U88u<RqEE{b^Ccm%U!adNSt3H)Jx{pb@c647Ahz6Mr#gscGqR|n)B
z`)d6T{;RR;D<1wpqJV+PK&Ux7agr`*D3elkIrHJIozd|JEFuYcO|imP9us)oLZ>#4
zLJ5#+H-fK7box`;3%cZIh@r%{5|ZQzu-O2)xOrt3LV=W6FnT!*yY#}HR8&_F?UIBB
z+!1@PVWjbO`SSAaTX-tirQXWt8mle^<QsSLE6nE6Ue!vt6Z3}exwo2Dsa@6<i#b!<
zd2Im+aub_gDlYBSo9@2cBS*Etj+K44VwsZ=77;N8abm=d$EEcaRrq*V%F)r0<*LR#
zphM7N5p^$L+U7e3F(3XUxl`pT02`qn5c5rP1E<>UhBq}4G_>Xe2^XS0WQ0xmzp?#f
zF1tIh`s+ftO}NF2UzOrlT;NN~h>>SP+qUG<;A=D~dGw1HUoy0C?z>A(zd-bblpzYh
zGO<!Fza<RsBau1)0)ZP$8iIk70n~EIZVSXA&)GbY&0L|K4(Ww7kl$rCM+3ZpQGsOQ
z;No4h?)<CtJH(-&Y>Bd*4v-6Yl632V%B#C?+sSdS`(s<xR*J+G6|WU%l+`UiJZnT@
z1+WvS-GU)V*M+1OE2ikD2}uiC2cQi@hds*twH~OiTM%k22a5qbzf^F~CWu}O7I5Vv
zXCUVAGA0J;i9-5d^>=p|v<+8=oCqGs{2_R!#qCXb!u9Sa)L^a19aB43;&DmDs(6R`
z&rDKpA1=&*vd(^ELD0m)!h!}G#;^zIIwK9_gG)YAjUhA{VILw)_=a^=tob9$MLjOo
zM^;1(_Wv~O&t<4y3fc%*egXyHLm@sI8aqZgmalcx$TpmT5!R63nWvn5cD{rPXAfDv
z7-B@}X1GhIT?Ws3NJv>YYnbUpe|E-@tBF}f665Tmbo4?&R9wstNDL}nbbDi%k!7^H
z;lWgyAa$(1>=zj+G5sKp4wVT1U^Nqd+BO|nGpf*Z?1wMQa*oo@m{R<NP2xt(?e_Ii
zAcpvSi0J*Pvv|dYTp>x$di1r|;Y+hUoVuVrT{iBG*UOVID|6)3oVMwr{>?!x7x-ap
z2r1u|xN_;fmSksiH3nc+)fpoCFF4v*t@pV{b7r{o>PgK^Ss!CVRsf=yx5Vql2(4Vc
z{c6>Kh$C2OLL4<e`RMj+4U>9G8kA4<44U-ZqS&P*qDps8boA(6biE%2^XXtZtx8@|
zI>;ELP8u5HpPgd`OA@Z+>!J?yODB~gY=EQlpoRAx*jXe_g3k_5^3j!&K#TlI{0AG_
zDpf&tW)>rw&y1Sp7^M%H#db+T0kp>b)lA#dm_>9#5mxt9o6m+#)1nN7@z}?eUg7kd
z;z(?bhc|g!6ZC1dufL2&)p_;B5)}Mf&~vrak_MX|-_#B8LuF|C!|Umt<+x%v8Jqt&
zzAi2DjtG6`KfrbR31w|*>xvO)ATzyI;mO2yMIUk5(=2i3zDmv(ij6EaP!|{5(CIXY
zXE&SH{Aq<LjM*_d9}tjYskK0HGvx5Kr8FFcnz$fShy9l<lx>-{V&nWIwqovHl@VK%
z_BJ)b{{ALJnG^gSpenPo@0)Def86luh8=Gfe6!MVvhvKNAYn~RUzz~ngEMb^)6QQx
zU$l_Xl$7<Y<ekk{U7LBcPpdsicZto(qWe-&7YONsO{d6;isV9a7Lg)uoSV<pcHZ(h
zML^TXb&P2D?73&b5O{D(i?9j5U{tzUmPpzy>Jo<(44cPuUtN-A6U(bi(AjdtUyN6{
z<iP$uIdxEgsnxl%E_aCx++XbMzG-bM=2~|g_!aijQ$g);k8UULZDZEZb$#2zoIIa>
z9%<{;CmH1J_M~Upbhl^9kEKdycu4-W6r+TD6mI&#W;OzCpc19O@q-Sj2*<6|37-4!
zo_%qpAMIGvd7hYqZhB%uT&y9BTM%ogV7BwW`m&nkx(PxV9JhO~AL+(qZAcgX`J2{2
zT27&cy8m;VsJIMqNrh(B1uy9|EP^krrw$}|lGmlJd_i?+C(iF-(-ihZq@Q_u+YQLB
zJ-jRKqD6-~yx`6M-YCuIJATs1n{E~twRA=AhQho|Mcbb~T<FVSFX?#f;jlJtXewSi
zNOa)+h|2q@*<%oY`FAn?=-rJ5oZu9>NyVNk7;Sv7q1BLk!*<*!IThD%FshJ=$U%2g
zR`r<M8K|9^?%JYW1I1-nYJ`j)dw9u<n737mxznE57}+)+6Lao#kN+|mm7-H+n{jl}
z{GerVtyeCD$Nq36IrTd+lpF09bmxGLf)C?sJMijvl8^&0i00|!az6t?NN~urCGVy*
zV(l7KdNHpUMZxAiP=&sVXVp?qZ(MiNqM%=}UK7eue%Pa@hzyP!|4@GK)7AdM{$#P;
zGed{np_cqTt(Q2Kzq~q4?1FVMht(ljj`E$Y!X?Ho3^$`h(Sxuur57^Wa}9d=2X)@y
zbz5cU)6Wx69Q_wnpR3pYzoQ#7ngG1WIuq9|WK{X%Wf^oKoKDEcs-5}+EkaMi8ulq!
zK@+AzLk-((Iz04|cwa{N%EN7skcJDCZh48f=Ad9=2U>;4%JdSnlD@;n|6OXT$827i
zCV|k=F%6k5Z40O#tw0QfdQTU2`d*#%$AN!`bYUV_kTMjq@XM!f{inEc>dYC>I5%!E
z9UeUzP4uHojn#yiGpNCTs`Ff)|17tySfTRGb1x|0kTs!*@5O>BsI^zGSO@C+%GJug
z9z|S1;6w(0lsZUQXalGOq4un^EC!J@E^`*~h$A8Tr+(g-fHn!eFn^Gxc0QOPmpw49
zom@ULwk}VD%_&>D>*Misc~1hx^_B*??Ddm*)B+bD)okUvlm1TFYn>IQb47f{1`OQ*
z=i-4ZD@O$ytY{oOJRyiP{MQ+10$D*p3tpe99{}VEL0s#!v~|GKYZ1_q{-<CW3@A;D
z(#u7Qp&2ge`UTj>!>}j5%P-qKEmk3nGAEMQ1?x?^{~yad4xL^#$Sp$YAOd`i>|MZy
z9HOWo^$Fd(H%(`~!f)O4X!zrj@gJuB(9qU{6A+_#Zu~%;yJ>Qwi=@z7GA+YXmTL=_
z_o#YsavIk;J=A$?P}tq@pVi(Uh0)`O)n3DsJunPbul3x?$P*{XtWW~5E!knBQD*Ij
zZa9nN3uu>C)bv2D+1%FV%mVb(TP#T-Awr__))pAuVQto#{T;Ao?$(t(%NCJcD$=Mx
zvm6giR-ER737Kq4`ZJT5En^k<&ewn(2D)p=XkU@JHbE8`6WrSI5N92KQLqC2+-2BH
ze1q3%Bo0Ev1hR})ghbIi>Z3=`iHQ98b&`g+?Y({>C0mDzVQdfc&PH8d(Y;7@lrNL)
zaLL;A;CJDtR+(P1iSKd@e3P#I-@A6MEi>F03;@_NWomlBcF1dtmw_JI8$6okIZD|5
zf^*zHm_*9GlMPY8kx)JR?LD8+Tj`!g>?-t}$W1)fZZ1&hJ@v4q)CTT{4a8ST$tnNh
z<P1cZ5D-M^0!M!nWe$)N5QhS^@ZuAz>_+6|_EvSbHx?EghyTa!T*xM8F5MSH)#5<h
zaFU1#c7ud(+*T}0F1z;Yst@)N-JtbQ#2XLilv+-W*Vo|v4a**D0l%Pp>h+yic|UyW
zJ~$iiLttbVe3ROQJLUH0^mAWHFI0C;y}f1RLI;EfE(mTwBUadsD02^3U5!y}3&?(K
zNkcb7LHsptPG|2;ckWoEbT!kB=WAO(F$0B2UaHVH?Y1s)`di@s?yDY9FcfZ7iLGv%
zQkug)c%8J6Te78vNSUkVQS({E)92FnlJ38zDj!2<rfF9A2z#vo1puC^%^N8hn=oaA
z5>RxrhBHmH7lwvM8S2a^inm0d^}IaF%al$XZlyh0rTQQ~&;BfM)hTIGGtrifhy4hb
zY`4mw3&&8tM{$`4+ZSo;xd<U-#ypBz*ejJ*@}vLodT^LqlD2f?x?*c}dfwpU&k0&P
zPJ0>35HUGGR!Iy>Syc%qwtn!qPRg~jZ@1oKrKHsX-G38*q?F+E(9jC?e1gJL{4X#A
z!HX66H=C3bI=n}HEpKXu``mG?u;os@<5btu{<tawUz?v$nc)o{pAa={q2btA@Sg)8
z#`7F;)A-yRKfj2kio51br#?P167<q4O`gZ!Bu1(qs(Bdfu9I636Xgu4RJpvzS9$`)
zX=__CM=mW#+|V3<S3G%Kl9x1}@L_8VYmnDrbd#2822TgU5Ye;TNxl#J+Wt5%KH_}a
zwHYJ!eU$;JI7u|UWq@M3en3<a`C37cY3VPjlRKN~<7F3g%iU!`yM<O&GxvMQQ=L~s
zn};<XI?G(OaJM_^d%{$;qRs}l-)4%BFsnns!&~7NUx!?^v~uO5tRr~W&mX>fboa5k
zpuIhJ5yHy}Uk(a`QuX1R^B7Lk>YaR;Tcm$4{;*O%xfg+SN>HNiiz5e*l5UE2@42&J
z%SP6n)dC46Hb7C@c@<g_SrN6+_+ZV|<WP-NCW+}jHe3nsOkT(IS#M)@qI&nDX3~6M
z^RZ_|=3r~>;v;p(Js!H{$5v21K3s^gL&gSK;TXf86^Q^E5sn6#7GX+8VUk4e@60J%
zWj0-)-j;RGR=kodt46%?!dxv3NY#PnmpU_0>LfU*B0_2~im&?p>Q=JLL!IF1T3fL)
z0dbV1g5B%I@i5}?tIIlv_w!C^s|(lW<#DC~_9J!UmZM9RE~i^;z(@w*$iWAP8B$ww
zffsf<rpQ16jZ2!rUvN9@U^&BDg|UKChWIgpUDQ+T9Qxh6p$@J~Sc3Iu2`(vt44!=;
zD}<=g6uJpb#!^<teYUq;e!7F1Sv1Wu`1Ueoivpr#@^6^QqqR8=mn6*_Pv=z^6e1P&
z4YD6OQv?1^pKD_qxQE8%S|lAvK<d=9##<GG|LJiq5F6F)Pc&7Gk%PMKmC`bGSZ<I(
za^lPJ`Pl_bT%L$a(&XF-_01f2c9D^{V(ml$m*q5@QgqwIxz<H#RPHS=A(4uU$PEt-
z8LKn>t+%mOl(tOUTdw5@$?W}*oU;<=&o(ufPF}pzijo_nI?IX1HnH96)iX5{rsU`i
z6d=9+zb^H^XOYbn7$EN01(H&<%|_jL-WqRTRTO}Zw<Sg{<MciFlnEdDoAVu56=w)j
zo>3H8jV>%?$u?b|T4s+G!+tOK2wd8bB11?>0*9}B&$V~k-gnF^-)ZoLIlA@Hesn>B
z;M0dw(2G{SLgNjnBh?(K7q}~7F+-<S4!=eg73<vB;k0zcT>TE0wZ0}Ac1JD8-)zQB
zezTENM6W-z(YvxoH$~)VQvD8wKC-xI9w}(iLGc@)e0DrJq<F0l-$WvpPRte?b9%G4
zHu-)_ZR=Gl(sm~;$4YPcXWI)A6S+_I%*;kJaC1f``xsF=JU_kG*T6j>uQ_6$g{ROZ
z>86LuE`>#H=*QgXL`LsFOpMDn8yLpQM&^buo;znxfs;@ZExW3Ha*>}_?OYJsGxp7|
zdhgzWM{GktDm%NgRE1Ab_=HVNjX<wp_6V=`&F^YiV$WZlA8V#L!6P__yk$rj-e)XI
ze(ss}L}37>Y0wVuWZxa92PPbWj6Dv_Biw@_&stneO`?kWlO4>P!)@9>1PF_a&E9>9
z+tcHi%PD@ZR^+&tTr;bU#JFKa)*r#IbDHhjO<m*CGg4UR>?lK(6+ahwXP_xWiIe^I
zEF#e2tIVL!tC;(K3Aj{S%%AXN(OE%B;xtA6#F`bA<NV!kL!RfOZQhn=JPQk%yzQA2
z5nyLG_Pe0Rez@|fMq(1FrkTh@>?BXsx?!wj@u;CXq+sJdW;;s?zrkHiL036y*?vkq
zoC_N>#ppCoj?+O!rtgpJGcX6XVqOQjXBj`ao+-wncQ0T0*co4Ez<cu;mnJqvNk~3O
z{B6zUV2*<nmQ10zA9tfk(>9pvM30xwTsECxm>t!8EAk9Xe7ZS$yilRO$(&&ff?Uo-
z){XRIu$*F`yI142A(h}XUVju?UstWId7$^f0klofqrFC{l$GlRa$i+H&VGb`yUgk2
z5kBQSbMpG!s7lX4PfSu!jathK0KV3Q{dxgDoFR$G-da`;zEFxN&2;axFPTX<C#Lnu
z($je>cu8b>{4e?Ly2FPEZnxwB{qE)C^{vfJKwmup=5HYY4j%l-`h1BXQwMp?I>2Nh
z)ikPnHor<uN5_g-?SVuhz`Yd(P+=l8ie#EH=#fsfS+Qcly+Vfx3)x{*$2EVU^0@K%
zI{gn+kGMXjjW^O2`bftJo)9NmA5?gWH;Qc(r#SjVj8E~bY&D&+)@|Op$#B!mj9n>+
zvf{qKIU_l=sVJGn!hw=X8XXsR)!khJykiI@1h5Udz$dw(t7|h*!V8l}K?dOKfbz@E
z!Lf4s<zds@5^2L<aGR$yZbfxr8jZHn^Vh7}rB07j?SxL88d(Vw%-NL!f)Hd80R$`A
zieLhJ-ZOz-jy3_k8k`3m&xcU(D{vszL(^*b-p=1`)EijHkX&lQOWjLXzvay9c2(2&
z1vBaBG_DA;WRn>o%Q!?BC&eOs<z@Is)d->jz;-It^#S^M*0mJ>36@K+YdOcy-{zn^
z<zNyI5$~7X$C{!B89FQWJqz$9==do<h%agwp0VwJcjZxNp*N=l5V1t)!#H~p-Frjt
z{hLRB=UhdMAu@leZ+ksZ)(Z?TUtb$J<VH%9+rK^OOPSwNM&15by#z(7iVc%lih-~8
zS*VMBgo<YSAoSpM`w3{+D0_g2<mhFO^B{IZ=vh0)JI(b8XU-(Z`z6wHYfB{BaY0lg
zIy(Ams0P3?vE9dvss^I!m3J5uMopSqzw}w)@3p5ljxv67`@tSK&0_7%Mk@dFQ)#Q!
zB>R@SJt*0lFqmaeslj{0tnnA{M4op-SGovN)Clqs^{-X#=jF=EY1q|2vn8Ts4VJ}j
z9FImi=EZd-d^h>4vYWUJ?ZwYqw^8|Ko;G&ksMUR)AoA(*Nu2d(v3GG|ClZ<Gi>qkl
z-xQE>%TuvRA%HIl5F$VdKJ2!Fh<*b$k0?lW*8%SC=l65~N(KTb|AND~@`LW2saUSF
zOiV4qZX+iY{;sx7Ag+x(0Wv_OhVjTjxReFYrp|7vOXmE_wH|WGozecCv50VdhEB!w
z8AfS(edlXJF_Rr2Bf#TsQ0-nc0h=6Xi`4qcA|eS2sLSU-mv-m?j<h8BKtSJ~gdQ=X
zGJsoTpopC66L^Y|_ARjfM1g~cSSb)vyKWgo#qUA3ENa!+%dV#_&VIkz=RNQtA;)&+
z62C`g-kB0A=uxPpT;u75Vin{c9%Nxr#*8AhEBM+DkAF}mL6jz_WhMR}emPOpF1FVn
z_@@l~7XXKXn>yJYNcdB2n|d2V8xNZg^hr9psR=dYuhk;wVi*`0<j<UL8LoG7bVOV>
z2q@aD=yab;@!`oA>;eK>RWZ2EotM%z@)cc&Kjwecv>2Is`aujHn#TZSJasx|hX=>r
z?@xRB^waJKYkg}<N=keCZ|dKy?oVnj=*%Dfi1PBZbG&6hOwuSFx|^){-%ifZ#X>Wm
zQQjBXb>XQRgnsR{BoLE^4u7z1tP&>$uZ<+JOEw$8Cnv!f{?C`|SSL&i+4pLsIMrhi
zWhgBT0G62AMbM<a1Bei2Bxnfi0!44dMa>tE0N@2Ep)PPI^YPmc5UdU04JKv_cN>9w
zW8Inj#GK|sU!Q^8_D>Gu;Hfu>e)}H4IE5K;$%V&v4BRbn!6aD0g0bXc;5!9}NFc3n
zkVlZSHvsNO0ON)?t3G4}brlenY7iA`EA&eGi_9qy$};Q_5${TG%GMAVSb>Lm4)$Z~
z@Xj_c8Y!WhfFzi}VzmrC7e_!&IZn4R83#`@yw^Vu+d3Jf_W)$6A%`Yb1V#YFz#e$3
zSHSd#MNJjFL~yu>am82=Gb|ErEC3g+C{S8vuHD~0k8s$5t2Le)B{vR@5p&=uLjI80
zn?gsabWB-OGh7(tgr*6AE^%DE_&xTbMJRRZvOE^%%45dCW4$1SSfUz1x$|KS9pN*^
zR~Lc11b!h7qsjuzxnB{(^0%Cu=MmaJo~lI%V1kFZ&w{$eJ36v!F%Dk^$~L7aqply<
zMoauBf~w;>{XgC1pmv+yzrko>X-RSo&Uj!`9rpwA@OGs$FXC?z6YJ?6+l){QApeH<
z@ye`KG%IskxCeK(0=f#nz^N7tu~xXd6Nq#IEeq#A1FsN%6@1_o$hvJwy(dW}%S7$&
zt`P29SpjEuUhgcxW>r*zqTapu!UMrY<YW#wnB;Nwl8SNgggAa{ni)ol5+IwRWS0K{
zB3v>kFnFr3LWtf7$8{iPY2)B=H{{?ESTSJ%w1j{di-bKE+D4AQfAAuA&3AW66*)Kq
z5P$~7mM6d#!=od&g#p&mx{LLZcWU!og5w1U^f2G3N<@2<9d5iOw3fh#ybL`NggFk!
zI2cU6j4vq>0b_O>;J6@6`m~^KNgCpaZzgJ)MXVLT<CLYJcLr8Ws8fi+&$ogRYm(R_
zV2`{+BRE)S4w%7o2@$!brf)`)z&gSHZ=?m~r^tnv2DTtKFYhye=$e-r`mjbCBkU=J
z)d{XA1jPl(#EZcDga^Y3h9aB*V?N~ErFHteemMB$iX0|XEZg5+fictE(Xs9l;+qb>
zHzXH_<P3XLHUmLfEBC$%r@}n`<U9efaMXXG8MwKnTS0;YFMN*#x&l*o0hwrTLzEae
z><}<L10!kyJn~4;69%0V(ul*^;Z5OMMW({-Mx<{@>;a>HZuaC}>#wfN{#rkIAUhz>
zY|51|dO?jFyAYbt%hhjJrJ>_@xlR}TFQM*us`eSMJN{XX`1N0T@oXy7Y{<=m(@h#+
z`PTsiFzdues9Jb#e@Kf;U8#Xcz#*3y*z)jLG$5~o3kG*SqCjb-;)BK4%J6PJI6wvb
zX+IbRO*R+D5ZWoMaSlKefi8>=XltOr*21fgA8hy5T7w53Y+D&{y<kqe;J5FM=ykK5
z$AStn9ge&}x`0`Rr8(ED-5od}HUj84A#c_f#;~T<pvzf&wpW*2tbISv)u6-S5^PC{
z)Gl@^Q8NLR^%8<=*UP`#Z#`UDiU1{dr1)#oZN~kOvs138tJ9^zIRTBLy?4!>7l^|M
zysV&Z`8<%IMUdwMVP8VzH&Pfrka4h{379My<y!M#s#cb{3@r8kFH75hTNDqdsiibM
Vg?@{Ff>W$eSFdO*=U+DQ|2LbCJDUIi

literal 0
HcmV?d00001

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD
new file mode 100644
index 000000000..c218548d2
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/BUILD
@@ -0,0 +1,24 @@
+load("//tensorflow/lite/micro:build_def.bzl", "generate_cc_arrays")
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+exports_files(
+    srcs = [
+        "hello_world_float.tflite",
+    ],
+    visibility = ["//tensorflow/lite/micro/examples/hello_world:__subpackages__"],
+)
+
+generate_cc_arrays(
+    name = "generated_hello_world_float_model_cc",
+    src = "hello_world_float.tflite",
+    out = "hello_world_float_model_data.cc",
+)
+
+generate_cc_arrays(
+    name = "generated_hello_world_float_model_hdr",
+    src = "hello_world_float.tflite",
+    out = "hello_world_float_model_data.h",
+)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/models/hello_world_float.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..f741b3a7b6b63a476832b4f95c3199b7b8b7e17e
GIT binary patch
literal 3164
zcmaJ@2~?9;7XAen6pE-38D%Ug<Jyq`c4Q>~4T~F81OYv+g<zs)3PB+#qqL@A5z&fU
z6>%wwOI1cxM(L6KHzHaE1?jP48RQIF>rpCIT<C~WGxr8UE#my=e0lHPci(rHy!Y;N
z6$Bx2@-%Om;3l{VE`m&O5*$coCv>I0Kr%ttM7U27dI*9O!HyJcY1oC}#B!mRWfiaK
zMI&C%>wE~k35lA7<yzh1D79vhDk)ZPS=W{1*=Do3C$-!sT0yPRC2FF)+ULug&1Rmn
zZ=ciZmL};18yb%z@LVl%us!bgBLs#{pEW5wA}DlvRLG=A|B3#Q{uGOYB_2DPXD+L5
zoK_c=kfhUV<1|srH3^B@c%ASL&C9w|3<P60LC7V95d=abHMxXr0_P-yu!O*T%U`tm
z8cF@u{5iJU<Q%yZ1fjb{M%0!y7Ey=Xacm6cWIeC3o}Odnf^egapSe88zR#le?_87F
ze1UM8@TyJEk7j@R>@hxa%~9qJ+o$Zitq6p5KZp-jrlU*z5om~0Df<SC81PZ1vZPdD
zx_|CVcvvvUR5C+n%HCiPw@dGc!7p25VrL9-UcSB50nl{dIt;ut0T=BIf&BU)^hsPL
z*%WrgamUS4QPWK<C_IHBUz~)rLq^yr^Mw~FdZ=%X!%d#ojkBIih2So|#CfTMOm~NG
z5i90afHv?dx*s)5Pi@ZQ-O+1dS&W1pdB<V&ly#`~{S!Ql?}?gQXTjOr2coq;7?pAc
zzH8`blJd&X)_=P4$dhzj8v2<rq9F%D%j)5W7tyeLbu^}58Umv)3Z`#sj>6IPGMo_|
ziBn68@VcE3%pUsyY`28M+@LGiG}r`&+TD0<=YE)zD&pGsY)C(tiH^G#;>(rZ*xhLv
ze%0b6?Vh|@%usB>?~Ko+5WNX9Vr`{W^*8a$x)O-}F;$$H{4ddv*;3qnl?+a}4-s7|
zdy3~r7UGQ)cIdjW6b;!Q`sP>P6FvJLPz<&CS)B6pP;u(9Y7knw6dx|j1jB+IQq+B4
zoEURnvfG|4)l?@y?$az-Qtd4bmkogYb7i<Oa~p<<r;XQ&Qz0j#5c_1-VEw<pfZTo`
zh|Oox;F-*buIoy~2M^1kMScuhVs=Sk`T`v9d;{cg+9-Vc%Y;Qzv$$<^cjbRCU%}vm
zwRpN}KkjT?i2;Kmux^tMAHJe|T#rz=36m7>RfS^X;yS7FWH)dKekx9Q90s|kMq#p6
z1{1eF7u^@<i7R()$En(KDdneQqSuY}kXJrM!hsc75?Kk=#icO6<tBzT<|+*GDurfL
zAwGV$!07j)0-kzL#``(<&}rWuth^8i&IR+a+w<P0oKLI3u<aY)F%D5Eeq(}>zkDo}
zt2Uya`#nYJ<1`o{SHk=KYcM+RaB+*EQTEs_!T7CdkTJ<uY%*Um$``DcF8+NpB+L5a
zqr1aQeMTP!|4(Y5$ZHjTzVHITsNrCoS1)Y}nj_v?`?2xlqkQ;U?kwfc+9<iqF2+sq
z$D})|H89mbPJB7E0t~xs6rY!##PU;DG3J;C3kF2Mj=CP0J!Ca3bef@@KG+s~@^)hK
zyKeYS%@at^55&J-IRb78;iAosa-4s^T8i>H1!?__aQNkZIIdWM!jGSz{e=|lH|+yu
z;+iVBwRooDz=*>bAOEv-@oPtA*aCw>H@+uq)_a&5#+($*|40LeuEkPDZIN_(&``|S
zl@5;v`oYSWDr^aPB+6(Xewwlldw-t-n=730qnB&pyeSLS+BJ~ndIJ>;i=@9TiG~X8
zR=6^3E%v_s70Uho3}?#T6aP6l9D}x%pfF}{@x;AH!G8Z<2p&Ej?Ka9W(xer`y!T<<
zs4F;&?peN*_+F8(XqJlQWjLv6Dum0tl<KQ};nwXmsbQTDI#qor{bh9nUO#jV0>)&4
zd9SUhG2?qQj&M_kA2gvn?+EUHjf?0k-_HM(!1FpE)1H8{qF2^DrB%-Do_#x%&XqIK
zDhon?qqYCAjf;IVHV-46-`}x*ZHgBQ1?7RWRS;g8YT*eT<PAJEBw+<!t;iC8&8x+X
zJ1!Qxn>VPyak;(n#LOuu>$yd8qkGVuz_l!7T2Q!KV7!j%g<hi`YpGRSCk4T=y?SyT
zbR>zjepXYzmT-%}e#WV^I%+I6)IcrQNtWkOEPTJy$0gQMTX&OPuCrVxx#n^W<GR2#
zqWm>|UdQ&%l8l-zyoPmo4cAsKVJu@mo#~gWynUOWE*809$1T_G)*SY**r9=ii#eE!
zKlasH|JkorUVa4#!UmdfCFGNXvk6Xw3#5=tXr%dQ0<S;S((0!*M{JkZk0Wqw6ow`4
z-K6&i8b|0=dd;Gw*f3RsUaO}{tx*T6Vq+(89b|p>fzj*NH+k7duG3at&eb#Gokne<
zW?7Pko&c&?kJjghr$>lNACeU7+sRD0CN`-vd4N`x=&x22UAr%AV|ypQ@P88BrpGa|
zo`LixQ`^zv+dT2;)DMq#FRb}s?(|o!`QSc-|HqMpXJnJ}+R<i|SDRIud!47U0}t;(
z=24KmQyyy`+jy+@ohfD#b+L|lU%C_45YGT=JNjewhJE2R{2$Nlz`^^8_swPE7(wkD
z9BrAQys6Z)R0&#DOspn!sfCL<k9Odr?Q1!cE+ik-2_NO?EsVeILxD~izouJH8vpD3
z^{|{h>a@qtaq;<I^EO+a|KAq-&$jsO&No2o>z&X3Z?@PAw>EJ86a=<QCM{>IE1gKr
k;U5Vd-s<#b=Qp|K-TlT{V=lJuV7W`ELo9r6@p<Fse*hBLG5`Po

literal 0
HcmV?d00001

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py
new file mode 100644
index 000000000..d4f47f9fd
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train.py
@@ -0,0 +1,141 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""hellow_world model training for sinwave recognition
+
+Run:
+`bazel build tensorflow/lite/micro/examples/hello_world:train`
+`bazel-bin/tensorflow/lite/micro/examples/hello_world/train --quantize --save_tf_model --save_dir=/tmp/model_created/`
+"""
+import math
+import os
+
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+import tensorflow as tf
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_integer("epochs", 500, "number of epochs to train the model.")
+flags.DEFINE_string("save_dir", "/tmp/hello_world_models",
+                    "the directory to save the trained model.")
+flags.DEFINE_boolean("save_tf_model", False,
+                     "store the original unconverted tf model.")
+
+
+def get_data():
+  """
+  The code will generate a set of random `x` values,calculate their sine
+  values.
+  """
+  # Generate a uniformly distributed set of random numbers in the range from
+  # 0 to 2π, which covers a complete sine wave oscillation
+  x_values = np.random.uniform(low=0, high=2 * math.pi,
+                               size=1000).astype(np.float32)
+
+  # Shuffle the values to guarantee they're not in order
+  np.random.shuffle(x_values)
+
+  # Calculate the corresponding sine values
+  y_values = np.sin(x_values).astype(np.float32)
+
+  return (x_values, y_values)
+
+
+def create_model() -> tf.keras.Model:
+  model = tf.keras.Sequential()
+
+  # First layer takes a scalar input and feeds it through 16 "neurons". The
+  # neurons decide whether to activate based on the 'relu' activation function.
+  model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(1, )))
+
+  # The new second and third layer will help the network learn more complex
+  # representations
+  model.add(tf.keras.layers.Dense(16, activation='relu'))
+
+  # Final layer is a single neuron, since we want to output a single value
+  model.add(tf.keras.layers.Dense(1))
+
+  # Compile the model using the standard 'adam' optimizer and the mean squared
+  # error or 'mse' loss function for regression.
+  model.compile(optimizer='adam', loss='mse', metrics=['mae'])
+
+  return model
+
+
+def convert_tflite_model(model):
+  """Convert the save TF model to tflite model, then save it as .tflite flatbuffer format
+    Args:
+        model (tf.keras.Model): the trained hello_world Model
+    Returns:
+        The converted model in serialized format.
+  """
+  converter = tf.lite.TFLiteConverter.from_keras_model(model)
+  tflite_model = converter.convert()
+  return tflite_model
+
+
+def save_tflite_model(tflite_model, save_dir, model_name):
+  """save the converted tflite model
+  Args:
+      tflite_model (binary): the converted model in serialized format.
+      save_dir (str): the save directory
+      model_name (str): model name to be saved
+  """
+  if not os.path.exists(save_dir):
+    os.makedirs(save_dir)
+  save_path = os.path.join(save_dir, model_name)
+  with open(save_path, "wb") as f:
+    f.write(tflite_model)
+  logging.info("Tflite model saved to %s", save_dir)
+
+
+def train_model(epochs, x_values, y_values):
+  """Train keras hello_world model
+    Args: epochs (int) : number of epochs to train the model
+        x_train (numpy.array): list of the training data
+        y_train (numpy.array): list of the corresponding array
+    Returns:
+        tf.keras.Model: A trained keras hello_world model
+  """
+  model = create_model()
+  model.fit(x_values,
+            y_values,
+            epochs=epochs,
+            validation_split=0.2,
+            batch_size=64,
+            verbose=2)
+
+  if FLAGS.save_tf_model:
+    model.save(FLAGS.save_dir, save_format="tf")
+    logging.info("TF model saved to %s", FLAGS.save_dir)
+
+  return model
+
+
+def main(_):
+  x_values, y_values = get_data()
+  trained_model = train_model(FLAGS.epochs, x_values, y_values)
+
+  # Convert and save the model to .tflite
+  tflite_model = convert_tflite_model(trained_model)
+  save_tflite_model(tflite_model,
+                    FLAGS.save_dir,
+                    model_name="hello_world_float.tflite")
+
+
+if __name__ == "__main__":
+  app.run(main)
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb
index 0aadd0bf0..3196383b8 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb
@@ -3003,8 +3003,6 @@
         "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
         "# Enforce integer only quantization\n",
         "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n",
-        "converter.inference_input_type = tf.int8\n",
-        "converter.inference_output_type = tf.int8\n",
         "# Provide a representative dataset to ensure we quantize correctly.\n",
         "converter.representative_dataset = representative_dataset\n",
         "model_tflite = converter.convert()\n",
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py
index 3388e1e3d..90d26d424 100755
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.py
@@ -468,8 +468,6 @@ def representative_dataset():
   converter.optimizations = [tf.lite.Optimize.DEFAULT]
   # Enforce integer only quantization
   converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-  converter.inference_input_type = tf.int8
-  converter.inference_output_type = tf.int8
   # Provide a representative dataset to ensure we quantize correctly.
   converter.representative_dataset = representative_dataset
   model_quant_tflite = converter.convert()
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc
index 573a4e57c..46d567669 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/Makefile.inc
@@ -233,9 +233,13 @@ $(MICRO_FEATURES_GENERATOR_TEST_SRCS),$(MICRO_FEATURES_GENERATOR_TEST_HDRS),$(MI
 $(eval $(call microlite_test,micro_speech_test,\
 $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS)))
 
-# Test the code for feature generation.
-$(eval $(call microlite_test,simple_features_generator_test,\
-$(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS)))
+# TODO(b/268568089): This test is taking very long time to finish; causing the
+# CI to run for a long time to finish.
+ifneq ($(TARGET_ARCH), hifimini)
+  # Test the code for feature generation.
+  $(eval $(call microlite_test,simple_features_generator_test,\
+  $(SIMPLE_FEATURES_GENERATOR_TEST_SRCS),$(SIMPLE_FEATURES_GENERATOR_TEST_HDRS),$(MICRO_SPEECH_GENERATOR_INPUTS)))
+endif
 
 # Tests the audio provider module.
 $(eval $(call microlite_test,audio_provider_test,\
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD
index 069fde48a..9e1a4e60f 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/BUILD
@@ -30,7 +30,7 @@ py_test(
     srcs = ["evaluate_test.py"],
     data = [
         "trained_lstm.tflite",
-        "trained_lstm_quant.tflite",
+        "trained_lstm_int8.tflite",
         ":sample_images",
     ],
     main = "evaluate_test.py",
@@ -43,5 +43,6 @@ py_test(
     deps = [
         ":evaluate",
         ":train",
+        "//tensorflow/lite/micro/tools:requantize_flatbuffer",
     ],
 )
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py
index e64abe200..f2fdbf3ed 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate.py
@@ -36,7 +36,6 @@
                     "the trained model path.")
 flags.DEFINE_string("img_path", "/tmp/samples/sample0.jpg",
                     "path for the image to be predicted.")
-flags.DEFINE_bool("quantized", False, "if the model is quantized")
 
 
 def read_img(img_path):
@@ -62,39 +61,94 @@ def read_img(img_path):
   return data
 
 
-def predict_image(interpreter, img_path, quantized=False):
+def quantize_input_data(data, input_details):
+  """quantize the input data using scale and zero point
+
+  Args:
+      data (np.array in float): input data for the interpreter 
+      input_details : output of get_input_details from the tflm interpreter.
+  """
+  # Get input quantization parameters
+  data_type = input_details["dtype"]
+  input_quantization_parameters = input_details["quantization_parameters"]
+  input_scale, input_zero_point = input_quantization_parameters["scales"][
+      0], input_quantization_parameters["zero_points"][0]
+  # quantize the input data
+  data = data / input_scale + input_zero_point
+  return data.astype(data_type)
+
+
+def dequantize_output_data(data, output_details):
+  """Dequantize the data 
+
+  Args:
+      data (int8 or int16): integer data that need to be dequantized 
+      output_details : output of get_output_details from the tflm interpreter.
+  """
+  output_quantization_parameters = output_details["quantization_parameters"]
+  output_scale, output_zero_point = output_quantization_parameters["scales"][
+      0], output_quantization_parameters["zero_points"][0]
+  # Caveat: tflm_output_quant need to be converted to float to avoid integer overflow during dequantization
+  # e.g., (tflm_output_quant -output_zero_point) and (tflm_output_quant + (-output_zero_point))
+  # can produce different results (int8 calculation)
+  return output_scale * (data.astype("float") - output_zero_point)
+
+
+def tflm_predict(tflm_interpreter, data):
+  """Predict using the tflm interpreter 
+
+  Args:
+      tflm_interpreter (Interpreter): TFLM interpreter
+      data (np.array): data that need to be predicted 
+
+  Returns:
+      prediction (np.array): predicted results from the model using TFLM interpreter 
+  """
+  tflm_interpreter.set_input(data, 0)
+  tflm_interpreter.invoke()
+  return tflm_interpreter.get_output(0)
+
+
+def predict(interpreter, data):
   """Use TFLM interpreter to predict a MNIST image
 
   Args:
       interpreter (tflm_runtime.Interpreter): the TFLM python interpreter
-      img_path (str): path to the image that need to be predicted
-      input_scale (float): quantization scale for the input tensor. Defaults to
-        1 (no quantization)
-      quantized (bool): if the model is quantized
+      data (np.array): data to be predicted
 
   Returns:
-      np.array : predicted probability for each class (digit 0-9)
+      np.array : predicted probability (integer version if quantized) for each class (digit 0-9)
   """
-  data = read_img(img_path)
-  # Quantize the input if necessary
-  if quantized:
-    # Get input quantization parameters (0 since input data has only one channel)
-    input_quantization_parameters = interpreter.get_input_details(
-        0)["quantization_parameters"]
-    input_scale, input_zero_point = input_quantization_parameters["scales"][
-        0], input_quantization_parameters["zero_points"][0]
-    # quantize the input data
-    data = data / input_scale + input_zero_point
-    data = data.astype("int8")
 
+  input_details = interpreter.get_input_details(0)
+  # Quantize the input if the model is quantized
+  if input_details["dtype"] != np.float32:
+    data = quantize_input_data(data, input_details)
   interpreter.set_input(data, 0)
   interpreter.invoke()
   tflm_output = interpreter.get_output(0)
+
   # LSTM is stateful, reset the state after the usage since each image is independent
   interpreter.reset()
-  # One image per time (i.e., remove the batch dimention)
-  # Note: quantized output (dtpe int8) is converted to float to avoid integer overflow during dequantization
-  return tflm_output[0].astype("float")
+  output_details = interpreter.get_output_details(0)
+  if output_details["dtype"] == np.float32:
+    return tflm_output[0].astype("float")
+  # Dequantize the output for quantized model
+  return dequantize_output_data(tflm_output[0], output_details)
+
+
+def predict_image(interpreter, image_path):
+  """Use TFLM interpreter to predict a MNIST image
+
+  Args:
+      interpreter (tflm_runtime.Interpreter): the TFLM python interpreter
+      image_path (str): path for the image that need to be tested
+
+  Returns:
+      np.array : predicted probability (integer version if quantized) for each class (digit 0-9)
+  """
+  data = read_img(image_path)
+  return predict(interpreter, data)
 
 
 def main(_):
@@ -105,8 +159,7 @@ def main(_):
     raise ValueError("Image file does not exist. Please check the image path.")
 
   tflm_interpreter = tflm_runtime.Interpreter.from_file(FLAGS.model_path)
-  category_probabilities = predict_image(tflm_interpreter, FLAGS.img_path,
-                                         FLAGS.quantized)
+  category_probabilities = predict_image(tflm_interpreter, FLAGS.img_path)
   predicted_category = np.argmax(category_probabilities)
   logging.info("Model predicts the image as %i with probability %.2f",
                predicted_category, category_probabilities[predicted_category])
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py
index 406e7a868..1092a7852 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/evaluate_test.py
@@ -22,18 +22,19 @@
 from tensorflow.python.platform import test
 from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime
 from tflite_micro.tensorflow.lite.micro.examples.mnist_lstm import evaluate
+from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer
 
 PREFIX_PATH = resource_loader.get_path_to_datafile("")
 
 
 class LSTMFloatModelTest(test_util.TensorFlowTestCase):
 
-  model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite")
-  input_shape = (1, 28, 28)
-  output_shape = (1, 10)
-
-  tflm_interpreter = tflm_runtime.Interpreter.from_file(model_path)
-  np.random.seed(42)  #Seed the random number generator
+  def setUp(self):
+    self.model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite")
+    self.input_shape = (1, 28, 28)
+    self.output_shape = (1, 10)
+    self.tflm_interpreter = tflm_runtime.Interpreter.from_file(self.model_path)
+    np.random.seed(42)  #Seed the random number generator
 
   def testInputErrHandling(self):
     wrong_size_image_path = os.path.join(PREFIX_PATH, "samples/resized9.png")
@@ -67,9 +68,7 @@ def testCompareWithTFLite(self):
           tflite_output_details["index"])
 
       # Run inference on TFLM
-      self.tflm_interpreter.set_input(data_x, 0)
-      self.tflm_interpreter.invoke()
-      tflm_output = self.tflm_interpreter.get_output(0)
+      tflm_output = evaluate.tflm_predict(self.tflm_interpreter, data_x)
 
       # Check that TFLM has correct output
       self.assertDTypeEqual(tflm_output, np.float32)
@@ -89,31 +88,28 @@ def testModelAccuracy(self):
       self.assertEqual(predicted_category, label)
 
 
-class LSTMQuantModelTest(test_util.TensorFlowTestCase):
-
-  quant_model_path = os.path.join(PREFIX_PATH, "trained_lstm_quant.tflite")
-  input_shape = (1, 28, 28)
-  output_shape = (1, 10)
+class LSTMInt8ModelTest(test_util.TensorFlowTestCase):
 
-  tflm_interpreter_quant = tflm_runtime.Interpreter.from_file(quant_model_path)
-  np.random.seed(42)  #Seed the random number generator
+  def setUp(self):
+    self.int8_model_path = os.path.join(PREFIX_PATH,
+                                        "trained_lstm_int8.tflite")
+    self.input_shape = (1, 28, 28)
+    self.output_shape = (1, 10)
+    self.tflm_interpreter_quant = tflm_runtime.Interpreter.from_file(
+        self.int8_model_path)
+    np.random.seed(42)  #Seed the random number generator
 
   def testQuantOutputs(self):
-    # Get input/output quantization parameters
-    input_quantization_parameters = self.tflm_interpreter_quant.get_input_details(
-        0)["quantization_parameters"]
-    output_quantization_parameters = self.tflm_interpreter_quant.get_output_details(
-        0)["quantization_parameters"]
-    input_scale, input_zero_point = input_quantization_parameters["scales"][
-        0], input_quantization_parameters["zero_points"][0]
-    output_scale, output_zero_point = output_quantization_parameters["scales"][
-        0], output_quantization_parameters["zero_points"][0]
+    # Get input/output information of the quantized model
+    input_details = self.tflm_interpreter_quant.get_input_details(0)
+    output_details = self.tflm_interpreter_quant.get_output_details(0)
+
     # Create a float model for results comparison
     float_model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite")
     tflm_interpreter_float = tflm_runtime.Interpreter.from_file(
         float_model_path)
 
-    num_test = 100
+    num_test = 10
     for _ in range(num_test):
       # Clear the internal states of the TfLite and TFLM interpreters so that we can call invoke multiple times (LSTM is stateful).
       self.tflm_interpreter_quant.reset()
@@ -123,28 +119,21 @@ def testQuantOutputs(self):
       data_x = data_x.astype("float32")
 
       # Run float inference on TFLM
-      tflm_interpreter_float.set_input(data_x, 0)
-      tflm_interpreter_float.invoke()
-      tflm_output_float = tflm_interpreter_float.get_output(0)
+      tflm_output_float = evaluate.tflm_predict(tflm_interpreter_float, data_x)
 
       # Quantized the input data into int8
-      data_x_quant = data_x / input_scale + input_zero_point
-      data_x_quant = data_x_quant.astype("int8")
+      data_x_quant = evaluate.quantize_input_data(data_x, input_details)
 
       # Run integer inference on the quantilzed TFLM model
-      self.tflm_interpreter_quant.set_input(data_x_quant, 0)
-      self.tflm_interpreter_quant.invoke()
-      tflm_output_quant = self.tflm_interpreter_quant.get_output(0)
+      tflm_output_quant = evaluate.tflm_predict(self.tflm_interpreter_quant,
+                                                data_x_quant)
       # Check shape and type
       self.assertDTypeEqual(tflm_output_quant, np.int8)
       self.assertEqual(tflm_output_quant.shape, self.output_shape)
 
       # Convert the integer output back to float for comparison
-      # Caveat: tflm_output_quant need to be converted to float to avoid integer overflow during dequantization
-      # e.g., (tflm_output_quant -output_zero_point) and (tflm_output_quant + (-output_zero_point))
-      # can produce different results (int8 calculation)
-      tflm_output_quant_float = output_scale * (
-          tflm_output_quant.astype("float") - output_zero_point)
+      tflm_output_quant_float = evaluate.dequantize_output_data(
+          tflm_output_quant, output_details)
       # Make sure the difference is within the error margin
       self.assertAllLess(abs(tflm_output_float - tflm_output_quant_float),
                          1e-2)
@@ -155,7 +144,75 @@ def testQuantModelAccuracy(self):
       # Run integer inference (quantized) on the sample image
       # Note that the TFLM state is reset inside the predict_image function.
       category_probabilities_quant = evaluate.predict_image(
-          self.tflm_interpreter_quant, image_path, quantized=True)
+          self.tflm_interpreter_quant, image_path)
+      # Check the prediction result
+      predicted_category = np.argmax(category_probabilities_quant)
+      # Check the prediction
+      self.assertEqual(predicted_category, label)
+
+
+class LSTMInt16ModelTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    # Convert the int8 model to int16
+    self.int8_model_path = os.path.join(PREFIX_PATH,
+                                        "trained_lstm_int8.tflite")
+    self.requantizer = requantize_flatbuffer.Requantizer.from_file(
+        self.int8_model_path)
+    self.requantizer.requantize_8to16()
+    self.int16_model = self.requantizer.model_bytearray()
+    self.input_shape = (1, 28, 28)
+    self.output_shape = (1, 10)
+    self.tflm_interpreter_quant = tflm_runtime.Interpreter.from_bytes(
+        self.int16_model)
+    np.random.seed(42)  #Seed the random number generator
+
+  def testQuantOutputs(self):
+    # Get input/output information
+    input_details = self.tflm_interpreter_quant.get_input_details(0)
+    output_details = self.tflm_interpreter_quant.get_output_details(0)
+
+    # Create a float model for results comparison
+    float_model_path = os.path.join(PREFIX_PATH, "trained_lstm.tflite")
+    tflm_interpreter_float = tflm_runtime.Interpreter.from_file(
+        float_model_path)
+
+    num_test = 10
+    for _ in range(num_test):
+      # Clear the internal states of the TfLite and TFLM interpreters so that we can call invoke multiple times (LSTM is stateful).
+      self.tflm_interpreter_quant.reset()
+      tflm_interpreter_float.reset()
+
+      data_x = np.random.random(self.input_shape)
+      data_x = data_x.astype("float32")
+
+      # Run float inference on TFLM
+      tflm_output_float = evaluate.tflm_predict(tflm_interpreter_float, data_x)
+
+      # Quantized the input data into int8
+      data_x_quant = evaluate.quantize_input_data(data_x, input_details)
+
+      # Run integer inference on the quantilzed TFLM model
+      tflm_output_quant = evaluate.tflm_predict(self.tflm_interpreter_quant,
+                                                data_x_quant)
+      # Check shape and type
+      self.assertDTypeEqual(tflm_output_quant, np.int16)
+      self.assertEqual(tflm_output_quant.shape, self.output_shape)
+
+      # Convert the integer output back to float for comparison
+      tflm_output_quant_float = evaluate.dequantize_output_data(
+          tflm_output_quant, output_details)
+      # Make sure the difference is within the error margin
+      self.assertAllLess(abs(tflm_output_float - tflm_output_quant_float),
+                         1e-3)
+
+  def testQuantModelAccuracy(self):
+    for label in range(10):
+      image_path = os.path.join(PREFIX_PATH, f"samples/sample{label}.png")
+      # Run integer inference (quantized) on the sample image
+      # Note that the TFLM state is reset inside the predict_image function.
+      category_probabilities_quant = evaluate.predict_image(
+          self.tflm_interpreter_quant, image_path)
       # Check the prediction result
       predicted_category = np.argmax(category_probabilities_quant)
       # Check the prediction
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite b/third_party/tflite-micro/tensorflow/lite/micro/examples/mnist_lstm/trained_lstm_int8.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..636ea0bbec46d4f00a386332a38c82257f81805c
GIT binary patch
literal 13952
zcma)j2Y4LUb?(eg-<_T99oqoxq7WS<*sCatk|k4Cb1c7vrbQ@{LlsMuU-I&Ov7I<y
z-pg~kpZ&$jQyf3{A{TW_vMfrZ*hvro(Xk6`?|pV=r@cD>Nm0diF21w3|L5Fu@11++
z%s~Kxp!*KoISL}k3I(7x$O&R2M1wpDp&$)`G6?#gfTDml_CU~nK=Dlwgn&E@2nD3M
zswiq7we1c-20%}bKlO!2AOGXSN5>yN^31X0H`;zpQ4}|*SNmz#+I{|+@yCxpdgR!9
zcrO7wwO$A4;Ya^;{OI9FAAj<h<B$gAHv&@Y76Fd)TG|5QTMm8vfm`pn_x3{{KYZ7%
z_wC)k_rASg4D{MKNKo&%sqJHrK7RPAX8_a3#t(mC{HdoOed2M*1?unahM<Fhh5&J0
zKnG?B`hf|Q0ZJQz9ss2okk^B9ARGw>aGC(+7$^e@A%Q~T0Ou!wz60pr0eub7R{??H
zt;zk*GwWmf1CXZykq~o3MgR{1)*#?(WjIKwV^#H>Qxy(0rp}H!H>&PbeW;|iQQM9T
z3=a(qK@fY5hl)2mFuYMMyB@xEUQxaS=m&s)0;qj<U7j_*Mgad`_<P`5A9a0vRQSTR
z`XL~ktS>zKcfb7mzk2P*zj^r=?_^P<ohVa510!xsQc*Ntf9Ko+hTY!PLHQ1Ty651b
z&EAobLu2M$18hAMHZgy&Q3zngghY#;{$|Kr)c4)>#7-)^VzkqFb`^)R7fTuwKG1n!
ztJ!d9U_Y`WDCU&!m54!Sc9L}7nVd*z!}WYp9t~%Vu+%d2fAW6XA?EqS618b(&!GoD
zK74y$=RNmgRvl*V-B52?EJ`P*WD-4X1+nYk?p=F!|KUz=TQ+xf#14l?;5whF&PD49
zl-hUb!B5<(t*q8^{QX}X-p>s8F2|SHS~Z@-Hh=EZPwX2RE_^Hi$3AiQU3cs=>5?g~
zMs7gj7*<`)+l+0cvD;JaRYDRBoA2KCKr^jHd#tS-;j&v~ElFvuo;{=9O<gXtbK^d{
zobTAT*C@NKcYT^aku0`w$)}A%i}v`keei%K7dN}b$+W%aAl5utwz8$mY@asXbX)p2
zS?ce`(&wK$50Mw%OgP)X;ygX~y;DDW`5Vut6`v8ZS0KrU<mJ_+AU3?^uI=NG{Sj{6
zzi;Q~d%fhId+!->qp+MeakyLzHtiynXWV|H%@NIAx}q#B{N}muDe0clK4P{OY2YjA
zH!qC*$)P>{Ax}Qvgfgdvfj@HJ$xSC>=igYUT{`#nZ|ohTE_oh`%HnzcHd`wKLwBOH
zGu12|I&%N+FzqP$2`_C%I@^$6&abaWb?s!jYv26QTmbEB+qdz+h^16aW~#9;wM&>P
zAMmuldNNlQ8J=Z?VT(Supm(lt8$9h>$Ex?>3-%x-YI60ZLNgWa>=*q~JceflXmHl#
zMauUc(`wv9q2<{#D4}m%DRZ*iy7K=LkC?OzCIdQD+-Y7!abh4K_!~`y!gj&Gvias5
zSCck_e?u^AN}y-t5{<{3kmWp7a&g^(d>7gIgvOfHvZ;95u!TEyksH!z2Why|-_I}$
zulsZ}WIfMo`B9UPJ@fPbI6?Q_`6*j#gm;<orDYARt2H&r<yJPpI^|scUr)V;%xJfq
zN*`T%sY;kby&=;(Y_`F6-WI^wpMU+!KV!(QnS{NU=|`=@9pUKGxf9uzPQTl_DIBwf
z!y%c8Dz&K#KYQuL%R}9FY?3E+WY-{WKu}tn`j<d|U;A#Zj_Wf>ern2UU6_j$^?z}f
zZn~k&AyY)c>9sn#^;=5Mz8RS?Zo7SSufe?~yV&Qny6<thJu9y+=@rNy6avj<6Vn#H
zovNI?{OW;`ik|fJw<@PKiNssw3RjK`S-hi6!CJjJix1j^#$syql2~lFIwU#?k9j*@
zoTBspHp^nEHs0tS(|PBfyDFIPs7|%2FE8*1+qyh`mA7K}KL1$r!au|*zh}5@<K6*9
z6X>OT9~|gmQe>e_^5*mNXAIS%W_H2C;d>hQC5qrsP@jeB+Ce-PDrB~rckKPd;X}g#
zgSkoybc<;5$?Pdnw`=;{$=Q|U)tR%uiaC%{s~+dbqQ@I<(-wM6keh<cs1-turqY{V
zesT31C(gcg%2?J`QL7CmFbCPzJz#*f9uJM{Wx=axC1J}>-hdR93Y<oCx$KHaTWGru
z*J%hUJB3yScZbL5Bf|m6>MhJyCl_OtW`%Rh@!0du*?n3kA~iXv&S6BY!t=KuGU!X3
zSav|sR=nPYQJN};;~ej_4fMEb>3B4@yj-vN>f(?@n_(YqKoOrt)SGP{+Gp^RW7wXK
zqIvVD?(Nw*(56jHHAoMEHAJ^IVE)XZP`@3z7=glc6OT`(78acyu4+kw1vy=5apYXl
zpxcZfQd;2nmT70GW+{h82fJxPFJ+V%-ceJkW<%55Xf~alHfS<cB@M%S9&RTPdqzY>
zxB?3}l#wo66qu?rxH0SwFqAP65DEs{;MSeJJNqtPy2RoY7;bQ-BxG^rI=AZs4zJNF
zQ?S*z^3N~TF|4W6z>d0mX`zqKi>ok<H3hNW;u0RZXZwafICS^X2kzd#66GVF;xt)Z
zoljM)9#8W8N?mc9obJvId?fz%>7^C#OsTXyot-%y?6bL@M!kh$dQQLe{gslzW!6&$
ztpd4Gt3l6*6k}lYMoO+IrryYVYp2yE&&Ge6)cbvXPH<j@KY9F#N5+rt^?PC>McZ=n
z<;i%NW*ij-)sKYCWWZzS#1`OoFXVExGsZk$5az|jtN9$C&I`G`CZ%lMZqZhAP{=`V
z@#l;DV9(f2{{h9IbcSvTF1srFIRd*nqu08_+aB1xk%5h_Sk5XcuU9!^JI{qa;ikpa
zcOzFJ+TYQ0%l5FYJsb*JxHt=U@R&g_4sYuTpoz_&-qr@Eny6K~GC_}Zkfhb`5Yx4(
zc+unO>9I+#otfmS9(V7UoYfM{z`&@r$+Oyz-?GsWLMPGi&W%CrVD|{*fnyimIeq?e
zD+PM(-YIiBqlYLEky?1e#xj~}_QQC)#{+3ty~D3b=AaHcM>izeh`BmDdi6>n&mom~
z#k~3HgPKY)TFBNjax%nc7GoK!j*TSsh)zQd?{rL^x~$Q`N<5Vmn%aqzm81#n-gAq$
z(6SiEyxR`{$s<IL*<P>KZvEue+rIc0_gf86jfgZwEdwb&97idiIm}pEx#g9Jf;wdZ
z=Ddp14H?@vdU_l>qfxmH?daeC(Bos<?O`-ca**GyqfPeBO-f4?*y7vM@r<tbQ-?nl
z)|E}LoXtvw5<56t5r}3fUiFXf?rU$wl6-JWkBzF#zjAU?4j^2!maB6pV>R>{eZ8A^
zIXJMEG(ww)P##`cUpxN@E^2qq#9dk0Sx!!|40g3s@M)Lr-0r1gn|?_h4DPV~x@dBT
z4t;a4t|WFP&KM!iF+CgJHBXz++Kc7}j+L&)A_-Tki}(CmXk5;;R+k+Uw+NhQ40%e?
zb9WkH%`2Nd9ih?fF}9{Ol4Hi!RfBKJ;$MC<XQ|I`J)ec<<E{~%0WVq4I}6hl*Ror0
zt5KYHQ2T@FO;?H!Ixj)>h{U}^SWb0`r!r^iB{{ibkG>)<)t9n*G~VHfq$J0xbJ#!K
z@rl<UWXfF4=e@bJ%Y5D85I(l(?KtZjo|jGuyf3g2Iu{tw7MO*p(2Fs)xwoL3KH1vL
zk|uo#`whD#{T+9#F7%2u_5_hEE@wi^Lj|_hbBeX3?numJC^MR$uq+r3L>~$MV#BCA
zZ1u(D+V185)lr(>Id`reztb*EzKWkE1OM4+$^2R<lL^!I&aScsDM2o4N_wSG-QCMa
z{nhl(tu9YLo?+YQ?G|esKj^ma9`-%gW=rNj_GL}%y}9}7K<@b-@umHo<@U<#5G6C^
z&z!t$FkRR`Q@Eo=*iWB3d3pYw=eB>n((+y{-NWGTVA=iNWxeBz=So!l;*ZEp30Kdq
z#n{45bEIMNSvQE*m)pC)ZVQYKEyzo|inOwF$oPxOrosKv%%43moN{*SS89ttJJ@VG
zq#rKWwKb>F+EX_YdDj-!($5qVZ4}G=`eTAX>3ZbIppELNhqFdjzDi_XcSW%(#@|&4
zJex!=aJNr??4hP&Dtjh+o8P{5k~a*NZ+$MacjNL(KofmkUcd%7TuNmrEhgQ6w?24K
zcx_|dlA#gDMP<3om5kq=;IYYXyjYlLzIVHmx%(ft56pYYB=b+xSC=B+Kk!av0*$r|
z-fXBtm}Tb?{S^ySjOm-{+Jn(fr#p!5>G~n!FvU5chZ4oM8O^ZeS<f%v@7xtA-+Ag=
zLOJ+<qPi6;_2YN+r#i+Yo&HDOSNzGgrms`;RPJgB9%0GG8&lcLYp0~SQwNR>_D{Ll
zbjBi{8(4id{@&aO{T=7_zA(t{C;U8f!1&3QvL*Qj|K?E<gH5k*;qg-*Ok$Tcb<379
z$NUN8LZF>_!7rGcC%*V<T7sO$0oR$K$1!{2*B{&VWL43LFWqu_LB^}2FZ7$t>DXUW
zM06~E+Y4I_=Iu78>Fo15L~^`w@8#D<|7Ia&Gw6rNzBIHqKC~~pEfwc}%-!~mdGM4L
z0}u1$>J?`7Oe?J!Zajat{-?MvJN6HkjFsTULno)-$S<?{vEpvX)G|izFD~uAxWmd=
zEMq_16U#d2z^RtaH?~<h9kp-&1T))TUzH@H)}I>ICGjE5<A!Np@{P;&l9$?Wx}Ljq
zF4?5nw=9Fu)#*20PfYrQb-rVhA($uhn$b_EAS2QmYwHT=qP~UvRLJz)LRZAF@eC7t
z_cre)SJzKjkF+}u)5{~bea{K|TDN6sGJvPrTE>2>W7{wG?%hqj_=dip3bel4J~Pc8
zQjl6Vbs>2$Lxr6=?Mmo~egF3*%Y&@PWP1C@jbAz<iEYXE=H}Lk7u=?qM_PDD<d=Gw
z_~-sNTQO?C=eqOV!9%5wubzorT6zn)FwW9*OS^we>YAIFWUm};3uLP+Mj~?AQeEAr
z)oy)r|Esz4KMd7f7J0??htJ>9dCJs@VRMMBJ^JN@FIY)zk7=Ea$e7_zQrHrNCLR@@
zDIre7d?MK+zV+DWel>YQQjGt@Qz~(Fza@9xTG%^s=2x#yPR(q+<=Ys#7>ulTP0Wvd
z=GD?-Df5D4bIAu~+&0r=3vOzJPU$UNQrS9xsbA+9xV)k{LE8l6+kOwWRX?Sx1Wc{q
zLqCjm3CUh1`syY9ii3Cm2ma?@^IgzwjkWjIXNYPG!k_P-H|_kI@R@IBwNgT6s~dbL
zo5edy!_LOaE~bqsT9MR``}LOS)z0d`V1<d~x(6l*sg=OW2mkrs{pXtD7RS84-TwS5
z&)+I8T5|4b>g76lE>kbx9{kU*Pq~ON-S=G9dLc-i=4K*#WA|{q<BhA5<Lv6t6!gh}
z89u`nCtiN5<GU8?)ztdj+zsx!j{qCRr<;m$bR2@d0P;Ry7%*{)@^f$pKI{i}67cT7
zHwZy7U?3=fIXwbi^m86yp9n$FKYa?s9T4>ApzbA*f9ay4jHKVg?)iHN`q;lf(7*jB
z1ib}l?u!s~=C1(e*FpSEVBh#V2zv5wA*k!0Am~TH$Z_Di5JZ9Uzz-nkL0|y+%AZ1z
z_$&nR{}+NHz!>uPH(~#EK~W9^1ITN@mU6fT>=78~2WV7Ol&@Do9?+kG^83Jr{>ieU
zK)?pmFDMES0Xh%*e1-+K3oy>V26bEWijutv`ycm0(BE`I(7oFrs2%9X0WiM{3_Jla
z?nPh}`PKjgJpuHj_#o&VV8r-2Fj&QVA!vgQf;x9X&`!|*KG6Rn(BCtl&w<RkPNJaR
zovpiBlrU1pOm$V8DYz87-;z&TO8KzQ>#f2o@M5GB(OXcumTVXiHg6-B^5H0?F&j(g
z!@6m$E8~8vIXIDm>D)9<o|zZ<v=4V!vk8Z|suSpyr2a~kEQ9M6$<$LpK3%LerBz)v
ziD^otS9Y!Po<<#ETP!M;qb+?=8xnohlBh4^EZMZMN~5T)86eRQO_$txHcR2MU4$SH
z7D7d>lx`U%N2KbR=ulXj!{n8vHi}k!ZS*<WHDiH|I+NFLGvJM0p4L-mv>TExvraLX
zbiE>1vNnqD!4<np+hf!^v`v$zm?TU{3>zhlT9=2ELueN6jHjVGzAD_}GZG%eq+y7f
z+*Z_tn@X*EOPe3IqBV;N;kgZUVp~(EKqH&BLLNiYr)XR9P`PawN3^7FXltWRX2($3
zS?VkIATEqY?Cv4O%%$#bT20ed(r&QXg;Yy)CGqs^jwWH4He))s;;5a<a&U}@u52ha
zkOhU8Oy%~{RI;eOqMa<+IifAM+!Ba_S<1^=YeFarTk;yyY(z&UVZD>iFjOvED_Eig
zG7SwX=i6*9OI?AKYI&v=!B<ZQhpU&ffts__s8x%r7ne_Il9$Ft(`RR$8QfS|P1Y~H
zwLIBMz1zO0JQsIW^|svNjGUg0Q0^r&ir2&_!kM#e<{USh%36Zph-%iMC`~#R!k%<A
z!y3g5Jo+R1mMzux{=n*LgUitl*4CODLSUo;(=bHKE}}25bX?3gcSdQcO#!x#RzvGF
z)=lksi7_;^Y`0&jIAKZW=x4~G1Wyroz2PMF5Xu^D)-js)rCHR;7F#Y2j>uv;WZY;#
zJ@uNNEH+6`Q`Sc+Y!BYm*$gEUX3t#F<+fFgsi-`R5$#A<Wfd`=SnznsRm;+1uAOT5
zbbh&kdCx9&Q8}4TL=qZ?_2S(fO4eAs*jZvZDwA4iXz8tO{XsUm@oLf|W>GkwUCC}G
zgVv!C7vFX=*(24gtX!Jm4tTW2J|AD+J3F(%STJ#pOj8=L$@;x6I6l0z5*9d0kmGQ}
zVxBYaFljOW>O^O&MiEOT62&}ua2?ZA*6eJU&1<QoXyRDCzDo7$2-`%;r)!$5Jj$_R
zbFi-M9Cq1WOOfqWLl}lB)&}q5L(UOf;GJ^CiI83TJR)^kw-YX$v1`sq1s>wMEuu?w
z89Mb&NFS1>pcGQ51>H8i)fq71cn8&qF4R_}eACCZ6BI)yc*1A2hf~}_2I2Z$ojQOt
zt?>j5MEg=}q256I^n*6N!;`D)fziu2UtUb}#t!>{v(0J9rb=2SL)EHTA!+0Aq-f*W
zNUl!U;#4Lri&+~<R<&fcC^ni@Flo}4Gv!PvhI4)1l6XE}!vZZzERop-M%)miwMz?E
z%Vo?jq%q8fPH>c+6{*xBUlTY32lGTjSjr(<K{9DlHMvoT32PoNElpMw4U}q?7f2p6
z5AqE&OE2=0;$`v#H)Y|i&hA_Tf&AxLtZKJ#`kG*hLr9=l*4D$3WrIj|Xf3j0$`*94
zdfb}}Gzq(0@@j0pR#<Af-02m2+*Q;N6=~S+XhS-(3756J%2c6T6H80maEk-cM_<+}
zxt6ZdC>!O;`bdegQkD7(Bpzud*tpZ&OhtSX%@zSGvjkqH=CZg)+k~zdE@k<W3#o9D
zkBW%~o25!r@XHyR5Wz9u>XS6E#_TF$arS%(6`SpSI<{9flzCuuDJ4^pm8298cn~?r
zw3QG|Em^HxaixG%6e?+Yz1?`hlw?!nLi>EfY@lRA#jEk_Io-0%n_8U{vv!ga8O7fi
zHciv@W`!w+N)=kvVb(0eZf>r?HNNVDLRnld*vecU!@`zJ<+=`77s7&|nA$G1PEs|d
ze>FjpJFRxV!YnxlU&DSwnV6BYGnCZTK4356GhJOT2T!3M>Ap9vx(J8+p3dCT#EvaA
z*VKr(+@!EN&TDsx4xOn<Kov{OL}jZwjYc-m0;(}ALO6{q-=2wG!E19(Uk}vM6ek>|
zVyBX~4!AI#Mao(7`j`)a8j`hOl9d81^KvWDB1?QLPigcCE0nG)Mu}#Oij*@{V+v+&
zn2ogxBI7z818e4`<{aBJXG~6y20<BrtyyB017blyOa<6%g>~_oS4pL*hTWX3*r^3O
zrbV@hoP`$ZHpM{K8hCZZqw^N@CAMX#=BzMbPt+J5SZFsS7&bG<r)i@Nq01Tx^CM(m
z3K5f&xv1Z0lzqGoVm2ZkH;0-hGmD%8IZf>(!@;*^99rJsikB{x1;t~a$Wf9E&URE;
z&f7FbYLUbcZM1}?0y3+M6<KrCPBaP&TcF-VP?}1#=yUNDf7KM0#o}gcU>S!eA>Md7
zQ9|<gb`rz4qONyn<7E?XiBzD>l6Ew0B`6bgh0<MS1fQ~M&eU~7MZ%<*jk6eCBK4GZ
z*2MC3cg4v>kWDi@TWJp=c(aY{tarpLO3mV%6yW@z3o~i_geO7G=*6PjHj^+DV+N1D
z!-iK<i|4I5!Al7-N;t^a%o{`EZ1$Boc>;BtO)*N-W5X?5-KB}tn;C9Gqwzp1jMUkI
z`nMzbQ^<KFJzr}KR(KPSF3B2MuDK{uk*ifqNgBv3EJ=-J2$LmLF!2TmJ`o-}kNH+t
zeqId-lpZBcZA7FW_icL41TO)j!Dv%x$L!19mh!{{;fp>zf2HAUYf#-aCK+x+$spiF
zH7Bjs9HX_-I$ejx9Ui<;P1xq=x`(*9=ndnoTVjsj=oN9HesOW{ZtrT$XP2}iEzH)Q
zHs@34liS)m)93s>La@WgI7~AXQz>XSx4BAhlSXXBY-hWg3xW&fEgqY9=~B~y+@Yms
zJ9Bk#Evt38in{1Tmc+N)q*uEx)%8l23UtR?xyTB^ddbw8k!eJqt>`<3lX-G-m2LD{
zCNB1*5KR*AGz}ItWD=4!?c7@n!FbKJ5EyQ6m&zeA-ekIBuh(m-xReTQ@s47|c5JTP
zbS<az=g2CS@eU0H5#kUsSFSrmSh=i~%vrOiV+dmR;3o?$1s+r`QKC89uy%K0_)f#L
zC@HWVxnQVM^Qn#i-y&?5>5`CB==ywbLXIL@&q_%nIiQx()N3nOd+V)LUQ;iYnYnI9
zg>Jc_rOH;SwNNNXKFo9x_ms7npkbNXOmXbwMRY`O&`8!~JJW4T61{rZT$$lV5zJR{
zRR(fKTr>~b@b=PNsjDU1X<NnnF3RcJ!tJOdakk_y>D)F64<&4ZxoUN%NLzzv>keK^
zX%SIia}^Vj!JSQmDCu~uKr<4hEG=nWvlh%CXsR&HqPjK>i8rh%LDuCE!ONoo7RpmW
zjl~q~l=Q(G+fg$0YV@KeJr*~U?Uts{R;J`KW`Piy5)GQN*kRD~IEABrjQ}xKE~<Af
zn=PVS*UW1mAG2V<qm5hx%P<OTg))nB%%IPAINm1t)m9p=G)vsv#mY%0-696wHR(@?
zmu#y<vXy<+aoU;UJ9gEq!gJWGzPEK;KKlKUi<T)RFpOA*ld)GlXB|j+@onoHiB(-k
zZ;C1ZI=f6v=-YGW7qE-5NG=$RO1U#jg1Mn5gv-@jE;=0}@r<Pw$<DZ=jI$mSvrFlP
zI;zt<q`C5><`ukcu7t;?66aeMlPg3d3QhJhqm8#0xp&!_qRz58sIOf`xpO*mehjXi
z;%b7_H*7J%SA_bC;;Q#Mgfj#sR~=iJLHV*!&%kb3=cr|9*?<{)9POndSr&^HqRYsL
zow}N_Wz)J)*$T1QX8TAN<f5!#N^lOrNrN_3N8Q`{<RP6oE95FMrG<hMFlh>Hv5?y_
z8>dX?R<RtM4pd8~Zok{yZ>6%4d?{U7u~WfPMd_xr&H+O$eF0CFS~<7alZOc!3E^(3
zBF#7IY`tv6Hd5&ox{A5ljHYFx=w7H>ot98{vS3;Y*d2CPyuM<~$8C0#<H0lU-d<RC
z?f69Jm5y4paBe7V+xEqCvqvWIim7|_*EVrJd&Z9Pn{J;-8%{g(3GeVZc%^EViW??f
z!%^Njm&j^|yQe~NBTlvKF53wBG~S$BwRP`e^;v{zNj|S_cmzkxQ)x!yBdbXshO0hT
zeQ?mh1*>VV#?h85)pSg<y&moA@f1eTxL%jpP)HPwohODedAhS}wO^Yj3};<zh3m1r
z&{uAtJ-zrC_{i%&+s$C*UG!^?QjPL=+IG}6i@g^D{?X_r^t-llowC?RysWh7o(u)|
z#_!Zza$&N|7!DOIbf%SYg~sYT$_rEpZzH>Psjzve!P~mJ<)8=)Rje1B#q)aal~NJ!
z@M`^OJyc8&igr&$?<`gpDLYR4HBK_Yc@eXvR_m_i(*$K;e3D0sk-N&+h7255=3^Dg
z+HjgW(?xnWDp9MLmpD@>L;4!6>0HQY_ZBP7Nr)ECDK;I+H6bifX?5nSk!(v|&KEbp
z)+RJB3#k>&ay8v3)#6D!Bq1!Y=CiH2>O8U9D2aGcuoqc=KFO7|v$>Qwv#99#YDKQ;
z*u<1A&rYQ$ic9kz($IveGYxY3swP@nicMB$QXV5#uOrhLEH+1E;EOqmiRk5AjZ0~5
z9T`6pYt$ko7q!CFE4)-Q6ON?ClTZ?wvfew5ms4eKk<!{lmpfUhr{;pbNe0f%<a6Cx
zpTX?UK$WHM-XDH(w14s3&njCNQFF*2aU~aD9N1QAN6(o~#`a{~tubUaF*nyheQAt>
zD;yK1X~F_Ne<iS`hE<Y`?n|7EAa1Rj(NgpH>3FzS37Hzuh8%~5T(qz>53Q!aXUD-#
z2eU8PVQV(*nmI|cz(H&^>~}bK6g<rGyoAK9+Gvgy3gIr7HJsOFRxfM#L^BYprK=H>
zKESk9b<5=iU>rZ&y8xy0vx?E5vzKb~RMi?uo*11rr?trnt*d&P*=k$X(x|NVR<UR=
zly=B^G8!$~I1~8Rv)d2FcX?(8Q5_P?WEi5GG<)_9(TQ!om5|X5Co4)LAlVJ>dz?sV
zyEhW>&<NyCOJ0I#_U!h<DQ9?=bTwhjy%5n5C0+lm?K+KRc)HeB7c`EQJfVva?Hk(+
zS~_$IxF#%9VkL#*IW2*OER0nLG(|4a7$Fff4%cgAgOmZC%#-XCDw|uhXg5OTf!&a_
zl(^)srwCD_1WXQ#SneAHFSd;<dc0EeaSf-v3%r!xHsqC~s|&7n9^Y!NS*`m$`PFB_
zL&SV$2BKQnR_zRJxxcr%;>cuuf~5s^NxD?G#0o~m=aREc3r7gJ$(m}V980{NbU8Q}
zg@o-2U&l)GnN6~+bt~u;VdPo?-8=#%F3fLHY&E}>J!{CwH7$lnjd%eTYCMO-SgM)U
z2((}YUN|L`&lDJps^%N3FvPIBLXl((nN}V45i$-(2oKz9$Wf@4l+dct$#E@h9kq2R
zXY>L_&^0MzuuXFLMoz$knm1}eQUk301xR;RlF$+<2CJ8N2c2voWK)mko6SnOPPA3e
z68(JEgBi;(3on4nUb!Shvsyo2HOaIB40$WoVq~GFR4A97hh42EA&T}DOQb#vIjT-M
z2)CqK4!n+zaL8PWZS|(frpH`?qCAqW^p@F4rB0G%sMcwXmaeX3Vlh_^T2YErEVUF1
zSr*XCOR+>VHRp`i=3B}9LJgx=>`R$i(fUqB%EnI#&HO5YYOeG!jT&mZXwY%R3GgP+
zqMBmN9MahZ!%2e?WoF7KE1ET212v6M+Ga_=K`}(~I-W5UtJrdNqk)QLUG9=xoVOYz
zT`Z|vQ2b3x8X+9ZnabOK8WFNe>CJSzV5<rWTwYl?>t|Y#5>3CH(&VL-E{WE%moIOT
z9E&1Nyp$`hB2mjyB|USYIH>Q8wXo(Jk#rKuON(Zuq|FFc9qmIk1*B9ZQItV>!Pqcr
zIU`<^GrD?RG$<aC=g?%R(a>3mR-V+zs8<pa!1;##EHu}o;j8sNoN4-7@mR$x`(*-`
z<Q;gPY*ZvGM9emBg$WTitgem{O{lSwvSy6uE5kYl4cm$ri#iP|w)g}C&t`dpwq-;k
zr*llXm4X}ReUO-*A!x~9pFSg$Xm2h@#00A%B)Z6=yLo<2$r{@E<z}I3U@8?ywT?BX
zFMu1D!xYOKA`Tpt^>G)Gh*ltNeIQV->3A9;1!vZ2NV7OaH+q5%G1rGcRa3WOHi<?(
zWTJ5oVyGNgByvVu&VnhhrLJw^enpqvGXa(BEk@sH6i7^}a#q;HyDw#mR1~sg+yxPh
zHB2qoS=M|n*0kjyeR^9<)-A<kLBq=apI(g-tD=6T4@NLHhw=&}8K1vgf{ID=d>3Z(
z;EUBlttHa0&%k1xBO-LeNo!WL*{n$;ymlJqTd1jM)Q{*CPK?)0PHp^xK=V{R-Yst<
zi`mjjgR$Gv7j*i(xuyh7?eMCdub6@5il2A;C0jOOAT{FDMqunrbvB8H$?braYRztA
z<IJd^>9U4eVRKt(r{-#$+UrCcPJ-I(9zX}TIBa*Yv5H}o1dbvGOHXG|?i&Gqg}~&z
z=7*OCcJ)&fuw4Tmzap?bQ^2Nf2ext+*q%R6LeReg`}4CcMS1vIA5a8=q@wztkage_
z2}t#Yyz9=}?+M)U#N)u%@c8)A0SE?V>%Mc$4UbFJrw_Om-tf8n7HISxKtBMa%JbNf
zM;`~#T6zk^UjwAt|JAxvfM*+sL7>z)0BFr!@FqV(2>Q})00pVOmud{m;UK)nUk8w`
z^9L0qFq5yz0)_%jfpJ{#qvEQ5mujrmeb6US^=VZ5s_j)$+p6tU|4TIwG`Lo#`k5Ml
zC*yiRLFqcbq4k2>HiCEyAn-{50w#P&_3>1722Aj4ZB^e+Rpxbsz+O;{s`x4&m8Tk1
z-K%2)p9|J}EmeO~VD^W=l&<wt=jwU@Q@rLwsq#~Gdc9m73;6VKBY(YI<#k<`APIrb
z1kl|8JPyE*0xAQ+z6NLuphW=wB|vR}UIFXpK0sx_=&OLb0lfnlenw>q=qo_6sxN67
zDD4G6#{rRm{uD4&2l(WAGiU^?L68J*KfvD!kg7irsQuul4b-W6QDs-<{L(d{)ilU$
zSLgoT<42B<Km5$GyN^6|{L$mU19trAEk}+W+ooaxc5D4qU48Q=KI)vP`UinJ27bPB
zjgKmWimk4R>p{)aQ^UJ<f%vf}j*cH2e0u!4r|;ks&m0H-yn}ZgIeynO$F~2r>V1zM
zd3x{Bqwkle7wky@d1KrvZxE<E!H*Tx-8}BK8t|ja0-K>70T6$P*~7<<96vt(_~1R`
zPd{?x$#Kx+#`x8J{LK&WQ`aE~Yy7_QU*LEA;bQ~OJpSm>N1qyhNLBNZV~5uj@zD6;
zV^1G{Oa)W*rpl=9-SzRPd&Ii7=2dUs^*~&oV_2QnzV*U2IcV@4`d#Dh0By$~epnUY
z3*(0$dGzSf@y8E8{m_wP<G``{`1n(gjURm!tcEoiRM?M$^#UST#q0*8%C+W!efanj
zhu7!s{T<cDfYk7}H}&lVd3BH6==;$4v11?TdA;9>oBDl-=fh7t^~dAKKhX0%p6cpf
z_gY`y*Qy*4^q#)daj3GWd-R4Z>xqvX|Fdhdz-xL|_X7yT9`JLVU)S^X^{!T259{Oj
z(pL3ZrOx@2Pme!y^ohftf9Q$FpFTeP-kSQT`Bk4i>eFX^53lpRKEFFaP5v6+>-%}F
zIrvd|s_R;PK7EKMD7(IoRi2*(=akx9O{-&4u|S|y{5wINs-us>@84nBp_U(c>e1tm
z46Kdj$m7R{!HCxJRXlZlocI8~`s@W^4gV*<1AqTls@p#Te@N}D%6}cd9h|=)yC(le
zkWioNAgtljpiY%vZF_w^sg-I_^SAt|YlkY&`^T{LefWR*0etn$0AUUP*zdt#X7_vq
z{+9RQuYLfZz6t;Q@4>GGogaZedIMh__df8A8$>tsqn>pjtjXU8>VB7g&i>^a+dr(I
zk@w+$`vdsu*>D~ItG@?dr~URv;19nKUn9L=e)WvLj-UTM_{Y!TJJj{?{`q}x{iyo+
z>`nOU;!^bif=0b#+zIM_+xnL;S+;ywH=Ebim5TeT58$rv<#pWe{0`hV{(EinhjGVl
z;F{(4^r6ZNt_*5elNVTyf2+I?nB5;C@1`5LpZfqVxPIKgy${s=Ha&dfmAxN<yYU9@
ziJNdY0z`BjckO)o$u(S6UmvwU54@}00bDUYd^W%Dp0GZj>bg_UW%Zt}!l{@lziW37
z&3^;wHc<Dw*892nzuhtb+I{FQaE;%!iwC$8969;`SX4(odu;sBlk2jp{GYqVTfJwh
zq&}zANS!Bi?GO0jfb@s(uirJmyTH@O*YVc-+QBo_3GS}z|J|d4-F$upKu&#5U(+3R
zA4t<{H(GTaqid;~=k4bE-j~1hrR`qO{saGyb@-<Lt6IDB-Fyd8pXciQseIJ^p`M*;
u9=O^l>fQ0$cwID*S6#!`Wl;C*^?3)c49c}Tsa=(I{du%5>$S73@BahkOZMpi

literal 0
HcmV?d00001

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc
index 0cbca9e84..0be91588a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/Makefile.inc
@@ -54,9 +54,13 @@ $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)tensorflow/lite/micro/models/person_detec
 #Find any platform - specific rules for this example.
 include $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/examples/person_detection/*/Makefile.inc)
 
-# Tests loading and running a vision model.
-$(eval $(call microlite_test,person_detection_test,\
-$(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS)))
+# TODO(b/268568089): This test is taking very long time to finish; causing the
+# CI to run for a long time to finish.
+ifneq ($(TARGET_ARCH), hifimini)
+  # Tests loading and running a vision model.
+  $(eval $(call microlite_test,person_detection_test,\
+  $(person_detection_TEST_SRCS),$(person_detection_TEST_HDRS),$(person_detection_GENERATOR_INPUTS)))
+endif
 
 # Tests the image provider module.
 $(eval $(call microlite_test,image_provider_test,\
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc
index 4403edc87..2ec3a1bf5 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/activations_common.cc
@@ -55,8 +55,8 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
                          ReluOpData* data) {
   float act_min = 0.0;
   float act_max = std::numeric_limits<float>::infinity();
-  double real_multiplier = static_cast<double>(input->params.scale) /
-                           static_cast<double>(output->params.scale);
+  double real_multiplier =
+      static_cast<double>(input->params.scale / output->params.scale);
 
   const RuntimeShape input_shape = GetTensorShape(input);
   const RuntimeShape output_shape = GetTensorShape(output);
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc
index a390a7355..dbcd57c2e 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/ceil.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,9 +21,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace ceil {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
@@ -64,12 +63,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   return kTfLiteOk;
 }
-}  // namespace ceil
+
+}  // namespace
 
 TfLiteRegistration Register_CEIL() {
-  return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval);
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc
index 597856cd2..31ab92591 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/comparisons.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,9 +22,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace comparisons {
+
 namespace {
 
 struct OpData {
@@ -530,8 +528,6 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -581,38 +577,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace comparisons
+}  // namespace
 
 TfLiteRegistration Register_EQUAL() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::EqualEval);
+  return tflite::micro::RegisterOp(Init, Prepare, EqualEval);
 }
 
 TfLiteRegistration Register_NOT_EQUAL() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::NotEqualEval);
+  return tflite::micro::RegisterOp(Init, Prepare, NotEqualEval);
 }
 
 TfLiteRegistration Register_GREATER() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::GreaterEval);
+  return tflite::micro::RegisterOp(Init, Prepare, GreaterEval);
 }
 
 TfLiteRegistration Register_GREATER_EQUAL() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::GreaterEqualEval);
+  return tflite::micro::RegisterOp(Init, Prepare, GreaterEqualEval);
 }
 
 TfLiteRegistration Register_LESS() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::LessEval);
+  return tflite::micro::RegisterOp(Init, Prepare, LessEval);
 }
 
 TfLiteRegistration Register_LESS_EQUAL() {
-  return tflite::micro::RegisterOp(comparisons::Init, comparisons::Prepare,
-                                   comparisons::LessEqualEval);
+  return tflite::micro::RegisterOp(Init, Prepare, LessEqualEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc
index a3f1cc346..59157564e 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,9 +26,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace concatenation {
+
+namespace {
 
 constexpr int kMaxInputNum = 10;  // Maximum number of input tensors
 constexpr int kOutputTensor = 0;
@@ -251,13 +250,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace concatenation
+}  // namespace
 
 TfLiteRegistration Register_CONCATENATION() {
-  return tflite::micro::RegisterOp(concatenation::Init, concatenation::Prepare,
-                                   concatenation::Eval);
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc
index 139eda7f5..163364372 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "tensorflow/lite/kernels/internal/reference/conv.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -113,14 +114,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         case kTfLiteInt4: {
           int8_t* unpacked_filter_data = static_cast<int8_t*>(
               context->GetScratchBuffer(context, data.filter_buffer_index));
-          reference_integer_ops::ConvPerChannelWithPackedInt4Weights(
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
+          reference_integer_ops::ConvPerChannel(
               ConvParamsQuantized(params, data),
               data.per_channel_output_multiplier, data.per_channel_output_shift,
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
-              tflite::micro::GetTensorShape(filter),
-              tflite::micro::GetTensorData<int8_t>(filter),
-              unpacked_filter_data, tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+              tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc
index c5519b544..2eec76aec 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/conv_test.cc
@@ -61,6 +61,9 @@ static TfLiteConvParams common_conv_params = {
 
 TF_LITE_MICRO_TESTS_BEGIN
 
+#if !defined(VISION_P6)  // TODO(b/268384678): xtensa vision p6 kernels break
+                         // this test, will if def till properly investigated.
+
 TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) {
   const int output_dims_count = 12;
   int8_t output_data[output_dims_count];
@@ -90,6 +93,9 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized4bitPerChannel) {
           &tflite::testing::common_conv_params, tflite::Register_CONV_2D(),
           output_data, kTfLiteInt4));
 }
+
+#endif  // !defined(VISION_P6)
+
 #if !defined(XTENSA)  // TODO(b/170321206): xtensa kernels are less general than
                       // reference kernels and we ifdef out test cases that are
                       // currently known to fail.
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc
index e872d4ac6..1bdb77cb9 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -66,31 +67,34 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
     case kTfLiteInt8: {
       switch (filter->type) {
-        case kTfLiteInt8: {
+        case kTfLiteInt4: {
+          int8_t* unpacked_filter_data = static_cast<int8_t*>(
+              context->GetScratchBuffer(context, data.filter_buffer_index));
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
           reference_integer_ops::DepthwiseConvPerChannel(
               DepthwiseConvParamsQuantized(params, data),
               data.per_channel_output_multiplier, data.per_channel_output_shift,
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
-              tflite::micro::GetTensorShape(filter),
-              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
           break;
         }
-        case kTfLiteInt4: {
-          int8_t* unpacked_filter_data = static_cast<int8_t*>(
-              context->GetScratchBuffer(context, data.filter_buffer_index));
-          reference_integer_ops::DepthwiseConvPerChannelWithPackedInt4Weights(
+        case kTfLiteInt8: {
+          reference_integer_ops::DepthwiseConvPerChannel(
               DepthwiseConvParamsQuantized(params, data),
               data.per_channel_output_multiplier, data.per_channel_output_shift,
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
               tflite::micro::GetTensorData<int8_t>(filter),
-              unpacked_filter_data, tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
index e39f33d5e..bc958e0e3 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
@@ -1,3 +1,4 @@
+
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,15 +25,23 @@ namespace tflite {
 namespace testing {
 namespace {
 
-#if !defined(XTENSA)  // Needed to avoid build errors from unused variables.
-constexpr int kMaxFilterChannels = 64;
-constexpr int kMaxBiasChannels = 64;
-#endif  // !defined(XTENSA)
-
 // Index of the output tensor in context->tensors, specific to
 // DepthwiseConv.
 constexpr int kOutputTensorIndex = 3;
 
+// TODO(b/268384678): xtensa vision p6 kernels breaks int4 test
+// due to recent added optimized kernel support to xtensa for int4.
+// The corresponding test is disabled while investigation is being
+// done. Corresponding variables used only in that test have to be
+// if def'd out to avoid unused variable errors for vision p6.
+
+#if !defined(VISION_P6)
+
+constexpr int kMaxFilterChannels = 64;
+constexpr int kMaxBiasChannels = 64;
+
+#endif  // !defined(VISION_P6)
+
 // Creates a DepthwiseConv opeerator, calls it with the provided input tensors
 // and some defaults parameters, and compares the output with
 // expected_output_data.
@@ -79,33 +88,11 @@ TfLiteStatus ValidateDepthwiseConvGoldens(
   return kTfLiteOk;
 }
 
-#if !defined(XTENSA)  // Needed to avoid build errors from unsused functions.
-void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data,
-                            int* filter_dims_data, const float* filter_data,
-                            int* bias_dims_data, const float* bias_data,
-                            const float* expected_output_data,
-                            int* output_dims_data,
-                            TfLiteDepthwiseConvParams* conv_params,
-                            float* output_data) {
-  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
-  TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
-  TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
-  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
-  const int output_dims_count = ElementCount(*output_dims);
-
-  constexpr int inputs_size = 3;
-  constexpr int outputs_size = 1;
-  constexpr int tensors_size = inputs_size + outputs_size;
-  TfLiteTensor tensors[tensors_size] = {
-      CreateTensor(input_data, input_dims),
-      CreateTensor(filter_data, filter_dims),
-      CreateTensor(bias_data, bias_dims),
-      CreateTensor(output_data, output_dims),
-  };
-
-  ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count,
-                               conv_params, 1e-5, tensors_size, tensors);
-}
+// TODO(b/268384678): xtensa vision p6 kernels breaks int4 test
+// due to recent added optimized kernel support to xtensa for int4.
+// The corresponding test is disabled while this is investegated in
+// order for the vision p6 nightly build to be green.
+#if !defined(VISION_P6)
 
 void TestDepthwiseConvQuantizedPerChannel(
     int* input_dims_data, const float* input_data, int8_t* input_quantized,
@@ -175,6 +162,39 @@ void TestDepthwiseConvQuantizedPerChannel(
                                               output_dims_count, conv_params,
                                               1.0, tensors_size, tensors));
 }
+#endif  // !defined(VISION_P6)
+
+// Xtensa kernels do not support float activations., and the corresponding tests
+// are disabled. As a result, helper functions that are only needed for float
+// kernel tests also need to be ifdef'd out to avoid build errors due to unused
+// functions.
+#if !defined(XTENSA)
+void TestDepthwiseConvFloat(int* input_dims_data, const float* input_data,
+                            int* filter_dims_data, const float* filter_data,
+                            int* bias_dims_data, const float* bias_data,
+                            const float* expected_output_data,
+                            int* output_dims_data,
+                            TfLiteDepthwiseConvParams* conv_params,
+                            float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateTensor(input_data, input_dims),
+      CreateTensor(filter_data, filter_dims),
+      CreateTensor(bias_data, bias_dims),
+      CreateTensor(output_data, output_dims),
+  };
+
+  ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count,
+                               conv_params, 1e-5, tensors_size, tensors);
+}
 
 #endif  // !defined(XTENSA)
 
@@ -460,54 +480,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) {
       golden, output_dims, &conv_params, output_float);
 }
 
-// Quantizing int8-ranged filter values down to int4 doesn't always yield the
-// accuracy sufficient to meet the golden values. So this test was created by
-// handcrafting filter values within the int4 range, and the golden data was
-// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8
-// quantization, and ensuring that int4 quantization yields the same outputs.
-TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) {
-  const int input_elements = 12;
-  int input_shape[] = {4, 1, 3, 2, 2};
-  const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12};
-  const int filter_elements = 16;
-  int filter_shape[] = {4, 1, 2, 2, 4};
-  const float filter_values[] = {1, 2, 3, 4, -5, 7,  -6, 7,
-                                 5, 6, 7, 4, 2,  -5, 4,  0};
-  const int bias_elements = 4;
-  int bias_shape[] = {4, 1, 1, 1, 4};
-  const int output_elements = 8;
-  const float bias_values[] = {1, 2, 3, 4};
-  const float golden[] = {
-      0, 26, 29, 84, 6, 46, 45, 114,
-  };
-  int output_shape[] = {4, 1, 2, 1, 4};
-  const int output_dims_count = 8;
-  int8_t output_data[output_dims_count];
-
-  const float input_scale = 0.5;
-  const float output_scale = 1.0f;
-  const int input_zero_point = 0;
-  const int output_zero_point = 0;
-
-  int8_t input_quantized[input_elements];
-  int8_t filter_quantized[filter_elements];
-  int32_t bias_quantized[bias_elements];
-  int8_t golden_quantized[output_elements];
-
-  TfLiteDepthwiseConvParams conv_params;
-  conv_params.activation = kTfLiteActNone;
-  conv_params.dilation_width_factor = 1;
-  conv_params.dilation_height_factor = 1;
-  conv_params.stride_height = 1;
-  conv_params.stride_width = 1;
-
-  tflite::testing::TestDepthwiseConvQuantizedPerChannel(
-      input_shape, input_values, input_quantized, input_scale, input_zero_point,
-      filter_shape, filter_values, filter_quantized, bias_shape, bias_values,
-      bias_quantized, output_shape, golden, golden_quantized, output_data,
-      output_scale, output_zero_point, &conv_params, kTfLiteInt4);
-}
-
 TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) {
   const float input_scale = 1.0f;
   const float filter_scale = 1.0f;
@@ -983,4 +955,58 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x1ShouldMatchGolden) {
                               kQuantizationTolerance, kTensorsSize, tensors));
 }
 
+#if !defined(VISION_P6)
+// TODO(b/268384678): xtensa vision p6 kernels break
+// this test, will if def till properly investigated.
+
+// Quantizing int8-ranged filter values down to int4 doesn't always yield the
+// accuracy sufficient to meet the golden values. So this test was created by
+// handcrafting filter values within the int4 range, and the golden data was
+// obtained by running TestDepthwiseConvQuantizedPerChannel() with int8
+// quantization, and ensuring that int4 quantization yields the same outputs.
+TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelInt4Filter) {
+  const int input_elements = 12;
+  int input_shape[] = {4, 1, 3, 2, 2};
+  const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12};
+  const int filter_elements = 16;
+  int filter_shape[] = {4, 1, 2, 2, 4};
+  const float filter_values[] = {1, 2, 3, 4, -5, 7,  -6, 7,
+                                 5, 6, 7, 4, 2,  -5, 4,  0};
+  const int bias_elements = 4;
+  int bias_shape[] = {4, 1, 1, 1, 4};
+  const int output_elements = 8;
+  const float bias_values[] = {1, 2, 3, 4};
+  const float golden[] = {
+      0, 26, 29, 84, 6, 46, 45, 114,
+  };
+  int output_shape[] = {4, 1, 2, 1, 4};
+  const int output_dims_count = 8;
+  int8_t output_data[output_dims_count];
+
+  const float input_scale = 0.5;
+  const float output_scale = 1.0f;
+  const int input_zero_point = 0;
+  const int output_zero_point = 0;
+
+  int8_t input_quantized[input_elements];
+  int8_t filter_quantized[filter_elements];
+  int32_t bias_quantized[bias_elements];
+  int8_t golden_quantized[output_elements];
+
+  TfLiteDepthwiseConvParams conv_params;
+  conv_params.activation = kTfLiteActNone;
+  conv_params.dilation_width_factor = 1;
+  conv_params.dilation_height_factor = 1;
+  conv_params.stride_height = 1;
+  conv_params.stride_width = 1;
+
+  tflite::testing::TestDepthwiseConvQuantizedPerChannel(
+      input_shape, input_values, input_quantized, input_scale, input_zero_point,
+      filter_shape, filter_values, filter_quantized, bias_shape, bias_values,
+      bias_quantized, output_shape, golden, golden_quantized, output_data,
+      output_scale, output_zero_point, &conv_params, kTfLiteInt4);
+}
+
+#endif  // !defined(VISION_P6)
+
 TF_LITE_MICRO_TESTS_END
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc
index 81b27039f..35abc943c 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/elementwise.cc
@@ -354,6 +354,10 @@ TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
       return EvalImplQuantized<int8_t>(context, node,
                                        elementwise::RsqrtEvalQuantized,
                                        elementwise::validate_input_func, type);
+    case kTfLiteInt16:
+      return EvalImplQuantized<int16_t>(context, node,
+                                        elementwise::RsqrtEvalQuantized,
+                                        elementwise::validate_input_func, type);
 
     default:
       MicroPrintf("Current data type %s is not supported.",
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc
index 6b2a4cc25..207b5c4b4 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/floor.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,9 +20,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace floor {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
@@ -39,12 +38,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                        tflite::micro::GetTensorData<float>(output));
   return kTfLiteOk;
 }
-}  // namespace floor
+
+}  // namespace
 
 TfLiteRegistration Register_FLOOR() {
-  return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval);
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc
index a148ce448..0c6f241f2 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -54,6 +55,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
+  if ((input->type == kTfLiteFloat32 && filter->type != kTfLiteFloat32) ||
+      (input->type == kTfLiteInt8 &&
+       (filter->type != kTfLiteInt8 && filter->type != kTfLiteInt4)) ||
+      (input->type == kTfLiteInt16 && filter->type != kTfLiteInt8)) {
+    MicroPrintf("Input type: %s with filter type : %s not supported.",
+                TfLiteTypeGetName(input->type),
+                TfLiteTypeGetName(filter->type));
+    return kTfLiteError;
+  }
+
   if (filter->type == kTfLiteInt4) {
     int filter_size =
         RuntimeShape(filter->dims->size,
@@ -113,29 +124,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
     case kTfLiteInt8: {
       switch (filter->type) {
-        case kTfLiteInt8: {
+        case kTfLiteInt4: {
+          int8_t* unpacked_filter_data = static_cast<int8_t*>(
+              context->GetScratchBuffer(context, data.filter_buffer_index));
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
           tflite::reference_integer_ops::FullyConnected(
               FullyConnectedParamsQuantized(data),
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
-              tflite::micro::GetTensorShape(filter),
-              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
           break;
         }
-        case kTfLiteInt4: {
-          int8_t* unpacked_filter_data = static_cast<int8_t*>(
-              context->GetScratchBuffer(context, data.filter_buffer_index));
-          tflite::reference_integer_ops::FullyConnectedWithPackedInt4Weights(
+        case kTfLiteInt8: {
+          tflite::reference_integer_ops::FullyConnected(
               FullyConnectedParamsQuantized(data),
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
               tflite::micro::GetTensorData<int8_t>(filter),
-              unpacked_filter_data, tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h
index 2083c3fbb..7dc1ebb2e 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected.h
@@ -73,7 +73,7 @@ TfLiteStatus CalculateOpDataFullyConnected(
 // (reference or optimized) must define this function.
 TfLiteRegistration Register_FULLY_CONNECTED();
 
-#if defined(CMSIS_NN) || defined(HEXAGON)
+#if defined(CMSIS_NN) || defined(HEXAGON) || defined(XTENSA)
 // Returns a TfLiteRegistration struct for kernel variant that only supports
 // int8.
 TfLiteRegistration Register_FULLY_CONNECTED_INT8();
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc
index e7d0056c3..5a8d312de 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -64,12 +64,13 @@ TfLiteStatus CalculateOpDataFullyConnected(
     QuantizeMultiplier(real_multiplier, &data->output_multiplier,
                        &data->output_shift);
 
-    data->input_zero_point = input->params.zero_point;
     // Filter weights will always be symmetric quantized since we only support
     // int8 quantization. See
     // https://github.com/tensorflow/tensorflow/issues/44912 for additional
     // context.
     TFLITE_DCHECK(filter->params.zero_point == 0);
+
+    data->input_zero_point = input->params.zero_point;
     data->filter_zero_point = filter->params.zero_point;
     data->output_zero_point = output->params.zero_point;
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc
index 1f46dd1ef..3a02e8156 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/gather_nd.cc
@@ -86,6 +86,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Assign to output the input type.
   output->type = params->type;
 
+  // The tensor output dims must be relocated
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
   // TFLM gather_nd does not create the output tensor, but it needs to ensure
   // that the output shape is correct. The result shape is
   // indices.shape[:-1] + params.shape[indices.shape[-1]:]
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc
index 0499260f5..76031b872 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/micro_log.h"
 
@@ -256,5 +257,24 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
   return kTfLiteOk;
 }
 
+TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context,
+                                        int scratch_buffer_index,
+                                        const TfLiteEvalTensor* tensor) {
+  if (tensor->type != kTfLiteInt4) {
+    return *tensor;
+  }
+
+  TfLiteEvalTensor new_tensor;
+  new_tensor.data.data = static_cast<int8_t*>(
+      context->GetScratchBuffer(context, scratch_buffer_index));
+  new_tensor.dims = tensor->dims;
+  new_tensor.type = kTfLiteInt8;
+  tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+      tflite::micro::GetTensorData<int8_t>(tensor),
+      tflite::micro::GetTensorShape(tensor).FlatSize(),
+      tflite::micro::GetTensorData<int8_t>(&new_tensor));
+  return new_tensor;
+}
+
 }  // namespace micro
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h
index aa369605e..f30ae44c3 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/kernel_util.h
@@ -131,6 +131,14 @@ TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
                                             MicroGraph* graph_info,
                                             int subgraph_idx);
 
+// If tensor is INT4, make a new TfLiteEvalTensor with data unpacked into
+// a scratch buffer. The returned tensor will have the kTfLiteInt8 type.
+// Assume scratch buffer is previously requested in Prepare, and
+// scratch_buffer_index can be used to retrieve that buffer.
+// If the tensor is not INT4, a shallow copy is returned.
+TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context,
+                                        int scratch_buffer_index,
+                                        const TfLiteEvalTensor* tensor);
 }  // namespace micro
 }  // namespace tflite
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc
index 5adea8e29..97f372aa2 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/l2norm.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,9 +22,6 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace l2norm {
 
 namespace {
 
@@ -37,8 +34,6 @@ enum KernelType {
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
-}  // namespace
-
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   TFLITE_DCHECK(node->builtin_data != nullptr);
@@ -135,14 +130,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace l2norm
+}  // namespace
 
 TfLiteRegistration Register_L2NORM_REF() {
-  return tflite::micro::RegisterOp(l2norm::Init, l2norm::Prepare, l2norm::Eval);
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc
index 7d3cb176f..3d1ffebb6 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/leaky_relu_common.cc
@@ -51,16 +51,15 @@ TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
     data->output_zero_point = output->params.zero_point;
 
     int output_shift_alpha;
-    double alpha_multiplier = static_cast<double>(input->params.scale) *
-                              static_cast<double>(params->alpha) /
-                              static_cast<double>(output->params.scale);
+    double alpha_multiplier = static_cast<double>(
+        input->params.scale * params->alpha / output->params.scale);
     QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
                        &output_shift_alpha);
     data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
 
     int output_shift_identity;
-    double identity_multiplier = static_cast<double>(input->params.scale) /
-                                 static_cast<double>(output->params.scale);
+    double identity_multiplier =
+        static_cast<double>(input->params.scale / output->params.scale);
     QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
                        &output_shift_identity);
     data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc
index 555ecd724..93d6bc7e4 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,1446 +14,282 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/micro/kernels/lstm_eval.h"
 
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <memory>
+#include <limits>
 
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-#include "tensorflow/lite/micro/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
-namespace lstm_internal {
-// Calculates a single LSTM gate.
-//
-// Implements the following formula: (* is matrix multiply)
-//   gate = activate(W_input    * input + W_aux       * aux_input   +
-//                   W_peephole * cell  + W_recurrent * prev_output + bias)
-// with layer norm:
-//   gate = activate(W_norm * normalize(...) + bias) // not adding bias inside
-//
-// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
-//
-// Parameters:
-// Input vectors (to LSTM):    | Size:                | Optional?
-//   input                     | n_input              |
-//   aux_input                 | n_aux_input          | y (bidir LSTM)
-// Input vectors (persistent states):
-//   output_state              | n_output             |
-//   cell_state                | n_cell               |
-// 'Constant' inputs:
-//   input_to_gate_weights     | n_cell * n_input     |
-//   aux_input_to_gate_weights | n_cell * n_aux_input | y (bidir LSTM)
-//   recurrent_to_gate_weights | n_cell * n_output    |
-//   cell_to_gate_weights      | n_cell               | y (peephole)
-//   gate_bias                 | n_cell               |
-//   layer_norm_coefficients   | n_cell               | y (layer norm)
-// Output vector:
-//   gate                      | n_cell               |
-// Scalar parameters:
-//   n_batch                                    - batch size / number of vectors
-//   n_input, n_aux_input, n_output, n_cell     - size of vectors.
-//   activation                                 - activation to use.
-//   is_input_all_zeros, is_aux_input_all_zeros - if input vectors are all zero.
-//   use_layer_norm                             - if doing layer norm LSTM.
-void CalculateLstmGateFloat(
-    const float* input, const float* input_to_gate_weights,
-    const float* aux_input, const float* aux_input_to_gate_weights,
-    const float* output_state, const float* recurrent_to_gate_weights,
-    const float* cell_state, const float* cell_to_gate_weights,
-    const float* layer_norm_coefficients, const float* gate_bias,
-    const int n_batch, const int n_input, const int n_aux_input,
-    const int n_output, const int n_cell,
-    const TfLiteFusedActivation activation, float* gate,
-    const bool is_input_all_zeros, const bool is_aux_input_all_zeros) {
-  const bool use_peephole = (cell_to_gate_weights != nullptr);
-  const bool use_layer_norm = (layer_norm_coefficients != nullptr);
 
-  // Initialize scratch buffers with bias for regular lstm or initialize with
-  // zero for layer norm lstm.
-  if (use_layer_norm) {
-    memset(gate, 0, n_cell * n_batch * sizeof(float));
-  } else {
-    tflite::tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch,
-                                                  gate);
-  }
-  // For each batch and cell: compute input_weight * input.
-  // Skip if input is all zeros.
-  if (!is_input_all_zeros) {
-    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        input_to_gate_weights, n_cell, n_input, input, n_batch, gate);
-  }
-  // For each batch and cell: compute aux_input_weight * aux_input.
-  // Skip if auxiliary input is not available or all zeros.
-  if (!is_aux_input_all_zeros) {
-    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_gate_weights, n_cell, n_aux_input, aux_input, n_batch,
-        gate);
-  }
-  // For each batch and cell: compute recurrent_weight * output_state.
-  tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_gate_weights, n_cell, n_output, output_state, n_batch, gate);
-  // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM)
-  if (use_peephole) {
-    tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_gate_weights, n_cell, cell_state, n_batch, gate);
+LstmTensors::LstmTensors(TfLiteContext* context, TfLiteNode* node) {
+  micro_context_ = GetMicroContext(context);
+  // 24 internal tensors. see lstm_shared.h for tensor names
+  for (size_t i = 0; i < 24; i++) {
+    internal_tensors_[i] = micro_context_->AllocateTempInputTensor(node, i);
   }
-  // Do layer normalization (if layer norm LSTM)
-  if (use_layer_norm) {
-    tflite::tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch);
-    tflite::tensor_utils::VectorBatchVectorCwiseProduct(
-        layer_norm_coefficients, n_cell, gate, n_batch, gate);
-    tflite::tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch,
-                                               gate);
-  }
-  // Apply activation
-  tflite::PortableApplyActivationToVector(gate, n_batch * n_cell, activation,
-                                          gate);
+  output_tensor_ =
+      micro_context_->AllocateTempOutputTensor(node, kLstmOutputTensor);
 }
 
-// Updates the LSTM cell state, used by both float and hybrid LSTM versions.
-//
-// Implements the following formula:
-//   cell_state_new = clip(forget_gate * cell_state + input_gate * cell_gate)
-//
-// With CIFG LSTM, input gate is replaced by (1-forget_gate).
-//
-// Parameters:
-//  - n_batch, n_cell: sizes of vectors
-//  - cell_state: input/output vector, size n_batch*n_cell
-//  - input_gate: input vector, size n_batch*n_cell.
-//  - forget_gate: input/scratch vector, size n_batch*n_cell, modified with CIFG
-//  - cell_gate: input vector, size n_batch*n_cell.
-//  - use_cifg: use 1-forget_gate instead of input_gate.
-//  - clip: if > 0, clip the resulting cell state to [-clip, +clip].
-void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state,
-                         const float* input_gate, float* forget_gate,
-                         const float* cell_gate, bool use_cifg, float clip) {
-  tflite::tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state,
-                                                 n_batch * n_cell, cell_state);
-
-  if (use_cifg) {
-    // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as
-    // scratch, as input_gate array is not allocated in this case. (Be careful
-    // not to write to the scratch before reading the forget gate data.)
-    float* scratch = forget_gate;
-    tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch);
-    tflite::tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_gate, scratch, n_batch * n_cell, cell_state);
-  } else {
-    tflite::tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_gate, input_gate, n_batch * n_cell, cell_state);
-  }
-  if (clip > 0.0f) {
-    tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip);
-  }
-}
-
-// Calculates the output state tensor of an LSTM step.
-//
-// Implements the following formula:
-//   output_no_projection = output_gate .* activate(cell_state)
-//     (elementwise vector product)
-// If no projection is used:
-//   output = output_state = output_no_projection
-// With projection:
-//   output = output_state = clip(W*output_no_projection + bias)
-//
-// Output might not have a different 'stride' than n_batch, so we need to copy.
-//
-// Parameters:
-//  - n_batch: batches: the number of distinct vectors in each array.
-//  - n_cell, n_output: sizes of vectors.
-//  - cell_state, output_gate: input vectors, size n_batch*n_cell.
-//  - projection_weights, projection_weights_scale, projection_bias:
-//      constant inputs, describing projection matrix and bias.
-//  - proj_clip: if > 0, clip the output of the projection.
-//  - output_state: output vector, size n_batch*n_output. Must be contigous.
-//  - scratch: scratch area, size n_batch*n_cell.
-void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output,
-                              const float* cell_state, const float* output_gate,
-                              TfLiteFusedActivation activation,
-                              const float* projection_weights,
-                              const float* projection_bias,
-                              const float proj_clip, float* output_state,
-                              float* scratch) {
-  tflite::PortableApplyActivationToVector(cell_state, n_batch * n_cell,
-                                          activation, scratch);
-  tflite::tensor_utils::VectorVectorCwiseProduct(output_gate, scratch,
-                                                 n_batch * n_cell, scratch);
-
-  const bool use_projection = (projection_weights != nullptr);
-  const bool use_projection_bias = (projection_bias != nullptr);
-
-  if (use_projection) {
-    if (use_projection_bias) {
-      tflite::tensor_utils::VectorBatchVectorAssign(projection_bias, n_output,
-                                                    n_batch, output_state);
-    } else {
-      memset(output_state, 0, n_batch * n_output * sizeof(float));
+LstmTensors::~LstmTensors() {
+  for (size_t i = 0; i < 24; i++) {
+    if (internal_tensors_[i] != nullptr) {
+      micro_context_->DeallocateTempTfLiteTensor(internal_tensors_[i]);
     }
-    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        projection_weights, n_output, n_cell, scratch, n_batch, output_state);
-    if (proj_clip > 0.0f) {
-      tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output,
-                                          proj_clip);
-    }
-  } else {
-    std::memcpy(output_state, scratch, n_batch * n_output * sizeof(float));
   }
+  micro_context_->DeallocateTempTfLiteTensor(output_tensor_);
 }
 
-// Calculates a single LSTM gate, int8x8_16 version.
-// Implements the same functionality as CalculateLstmGateFloat.
-void CalculateLstmGateInteger8x8_16(
-    // Input and weights
-    const int8_t* input, const int8_t* input_to_gate_weights,
-    const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a,
-    const int32_t input_to_gate_scale_b,
-    // Output state and weights
-    const int8_t* output_state, const int8_t* recurrent_to_gate_weights,
-    const int32_t* recurrent_to_gate_bias,
-    const int32_t recurrent_to_gate_scale_a,
-    const int32_t recurrent_to_gate_scale_b,
-    // Cell state and weights
-    const int16_t* cell_state, const int16_t* cell_to_gate_weights,
-    const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b,
-    // Layer normalization parameters (layer norm LSTM)
-    const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias,
-    const int32_t layer_norm_input_scale_a,
-    const int32_t layer_norm_input_scale_b,
-    const int32_t layer_norm_variance_guard,
-    // Array sizes
-    const int n_batch, const int n_input, const int n_output, const int n_cell,
-    const TfLiteFusedActivation activation,
-    // Output
-    int16_t* gate,
-    // Parameters for performance optimizations
-    // Scratch arrays
-    int32_t* scratch5) {
-  const bool use_peephole = (cell_to_gate_weights != nullptr);
-  const bool use_layer_norm = (layer_norm_coefficients != nullptr);
-
-  // Initialize scratch buffers with zeros. Note that unlike float and hybrid
-  // versions, bias is only used in layer normalization.
-  memset(gate, 0, n_batch * n_cell * sizeof(int16_t));
-  // For each batch and cell: compute input_weight * input.
-  tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input, input_to_gate_bias, input_to_gate_weights, input_to_gate_scale_a,
-      input_to_gate_scale_b, n_batch, n_input, n_cell, 0, scratch5, gate,
-      nullptr);
-  // Note: no aux_input.
-  // For each batch and cell: compute recurrent_weight * output_state.
-  tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      output_state, recurrent_to_gate_bias, recurrent_to_gate_weights,
-      recurrent_to_gate_scale_a, recurrent_to_gate_scale_b, n_batch, n_output,
-      n_cell, 0, scratch5, gate, nullptr);
-  // For each batch and cell: compute cell_weight * cell_state (peephole LSTM)
-  if (use_peephole) {
-    tflite::tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_gate_weights, n_output, cell_state, n_batch,
-        cell_to_gate_scale_a, cell_to_gate_scale_b, gate);
-  }
-  // Do layer normalization (if layer norm LSTM)
-  if (use_layer_norm) {
-    tflite::tensor_utils::ApplyLayerNorm(
-        gate, layer_norm_coefficients, layer_norm_bias,
-        layer_norm_input_scale_a, layer_norm_input_scale_b,
-        layer_norm_variance_guard, n_batch, n_cell, gate);
+// Verify the LSTM internal tensor properties (e.g., type checks)
+// Input/output/states/fc weights tensors are required for kernel evaulation.
+// The state tensors should be variables. Variants of the standard LSTM
+// are not supported here, therefore their corresponding tensors should be
+// invalid
+TfLiteStatus LstmTensors::ValidateTensorStatus(TfLiteContext* context) const {
+  // Verify certain tensor properties
+  // input tensor
+  TF_LITE_ENSURE(context, internal_tensors_[kLstmInputTensor] != nullptr);
+  // hidden state
+  TF_LITE_ENSURE(context, internal_tensors_[kLstmOutputStateTensor] != nullptr);
+  TF_LITE_ENSURE(context,
+                 internal_tensors_[kLstmOutputStateTensor]->is_variable);
+  // hidden state becomes input so they must have the same type
+  TF_LITE_ENSURE_EQ(context, internal_tensors_[kLstmOutputStateTensor]->type,
+                    internal_tensors_[kLstmInputTensor]->type);
+  // cell state
+  TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor] != nullptr);
+  TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor]->is_variable);
+  // output
+  TF_LITE_ENSURE(context, output_tensor_ != nullptr);
+  // output type is the same as the input type (activations)
+  TF_LITE_ENSURE_EQ(context, output_tensor_->type,
+                    internal_tensors_[kLstmInputTensor]->type);
+
+  // weight tensors (1-9, see lstm_shared for index definition)
+  const auto weight_type =
+      internal_tensors_[kLstmInputToForgetWeightsTensor]->type;
+  for (size_t i = 1; i < 9; i++) {
+    TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr);
+    TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, weight_type);
   }
 
-  // Apply activation
-  switch (activation) {
-    case kTfLiteActSigmoid:
-
-      reference_integer_ops::Logistic(
-          0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
-          n_batch * n_cell /*NumElements(input->dims)*/,
-          gate /* tflite::micro::GetTensorData<int16_t>(input) */,
-          gate /*tflite::micro::GetTensorData<int16_t>(output) */);
-
-      break;
-    case kTfLiteActTanh: {
-      int32_t dims_data = n_batch * n_cell;
-      RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data);
-      reference_integer_ops::Tanh(0, 0, tanh_inp_shape, gate, tanh_inp_shape,
-                                  gate);
-    } break;
-    default:
-      // Only Sigmoid or Tanh is used.
-      TFLITE_ASSERT_FALSE;
+  // bias tensors (12-15, see lstm_shared for index definition)
+  const auto bias_type = internal_tensors_[kLstmForgetGateBiasTensor]->type;
+  for (size_t i = 12; i < 16; i++) {
+    TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr);
+    TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, bias_type);
   }
-}
-
-// Updates the LSTM cell state, used by both integer LSTM versions.
-// Also see UpdateLstmCellFloat.
-//
-// Parameters:
-//  - n_batch, n_cell: sizes of vectors
-//  - cell_state: input/output vector, size n_batch*n_cell
-//  - cell_state_scale: scaling factor of cell state.
-//  - input_gate: input vector, size n_batch*n_cell.
-//  - forget_gate: input/scratch vector, size n_batch*n_cell, always modified.
-//  - cell_gate: input vector, size n_batch*n_cell.
-//  - use_cifg: use 1-forget_gate instead of input_gate.
-//  - clip: if > 0, clip the resulting cell state to [-clip, +clip].
-void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state,
-                           int32_t cell_state_scale, const int16_t* input_gate,
-                           int16_t* forget_gate, const int16_t* cell_gate,
-                           bool use_cifg, int16_t clip) {
-  // Use the forget_gate array as scratch, as input_gate array is not allocated
-  // in CIFG case. (Be careful not to write to the scratch before reading the
-  // forget gate data.)
-  int16_t* scratch = forget_gate;
-
-  tflite::tensor_utils::CwiseMul(forget_gate, cell_state, n_batch, n_cell, 15,
-                                 cell_state);
-  if (use_cifg) {
-    tflite::tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch);
-    tflite::tensor_utils::CwiseMul(scratch, cell_gate, n_batch, n_cell,
-                                   30 + cell_state_scale, scratch);
-  } else {
-    tflite::tensor_utils::CwiseMul(input_gate, cell_gate, n_batch, n_cell,
-                                   30 + cell_state_scale, scratch);
+  // Tensors from LSTM variants are invalid
+  // No peephole
+  for (size_t i = 9; i < 12; i++) {
+    TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
   }
-  tflite::tensor_utils::CwiseAdd(cell_state, scratch, n_batch, n_cell,
-                                 cell_state);
-
-  if (clip > 0) {
-    tflite::tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip);
+  // No projection
+  for (size_t i = 16; i < 18; i++) {
+    TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
   }
+  // No internal layer norm
+  for (size_t i = 20; i < 24; i++) {
+    TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
+  }
+  return kTfLiteOk;
 }
 
-// Calculates the output state tensor of an LSTM step. See Float and hybrid
-// versions as well.
-//
-// Parameters:
-//  - n_batch: batches: the number of distinct vectors in each array.
-//  - n_cell, n_output: sizes of vectors.
-//  - cell_state, output_gate: input vectors, size n_batch*n_cell.
-//  - cell_state_scale: scaling of cell_state.
-//  - hidden_scale_[a|b]: effective scale of cell_state.*output_gate
-//  - hidden_zp: zero_point for cell_state.*output_gate
-//  - projection_weights, proj_scale_[a|b], projection_bias:
-//      constant inputs, describing projection matrix and bias.
-//  - output_state_zp: zero point of output_state. (Input, calibrated value.)
-//  - quantized_proj_clip: if > 0, clip the output of the projection.
-//  - output_state: output vector, size n_batch*n_output. Must be contigous.
-//  - scratch0: scratch area of size n_batch*n_cell
-//  - scratch1: scratch area of size n_batch*n_cell
-//  - scratch2: scratch area used by MatrixBatchVectorMultiplyAccumulate
-void CalculateLstmOutputInteger8x8_16(
-    int n_batch, int n_cell, int n_output, int16_t* cell_state,
-    int32_t cell_state_scale, const int16_t* output_gate,
-    int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp,
-    const int8_t* projection_weights, int32_t proj_scale_a,
-    int32_t proj_scale_b, const int32_t* projection_bias,
-    int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state,
-    int16_t* scratch0, int8_t* scratch1, int32_t* scratch2) {
-  // Note: unlike float/hybrid, the activation is always Tanh.
+namespace lstm_internal {
 
-  {
-    int32_t tanh_input_left_shift = (15 + cell_state_scale) - 3;
-    int32_t dims_data = n_batch * n_cell;
-    if (tanh_input_left_shift < 0) /* handling negative shift value */
-    {
-      int32_t i;
-      tanh_input_left_shift = -tanh_input_left_shift;
-      for (i = 0; i < dims_data; i++) {
-        cell_state[i] = cell_state[i] >> tanh_input_left_shift;
-      }
-      tanh_input_left_shift = 0;
+const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
+const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
+
+void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
+                    int n_input, int16_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      int32_t sum = input_1[index] + input_2[index];
+      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
+      output[index] = static_cast<int16_t>(sum_clamped);
     }
-    RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data);
-    reference_integer_ops::Tanh(0, tanh_input_left_shift, tanh_inp_shape,
-                                cell_state, tanh_inp_shape, scratch0);
   }
-  tflite::tensor_utils::CwiseMul(output_gate, scratch0, hidden_scale_a,
-                                 hidden_scale_b, n_batch, n_cell, hidden_zp,
-                                 scratch1);
-
-  const bool use_projection = (projection_weights != nullptr);
+}
 
-  if (use_projection) {
-    // Note: no bias like in float/hybrid
-    memset(output_state, 0, n_batch * n_output * sizeof(int8_t));
-    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        scratch1, projection_bias, projection_weights, proj_scale_a,
-        proj_scale_b, n_batch, n_cell, n_output, output_state_zp, scratch2,
-        output_state, nullptr);
-    if (quantized_proj_clip > 0) {
-      tflite::tensor_utils::CwiseClipping(output_state, n_batch * n_output,
-                                          quantized_proj_clip);
+void AddElementWise(const float* input_1, const float* input_2, int n_batch,
+                    int n_input, float* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      output[index] = input_1[index] + input_2[index];
     }
-  } else {
-    std::memcpy(output_state, scratch1, n_batch * n_output * sizeof(int8_t));
   }
 }
 
-// Performs an LSTM batch inference step for input specified by input_ptr.
-// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
-// biases (*_bias_ptr), and buffers (*_scratch), along with additional
-// parameters:
-//  - params: various LSTM params including activation, clipping, etc.,
-//  - n_batch: size of batch,
-//  - n_cell: number of cells (or units),
-//  - n_input: the input size,
-//  - n_aux_input: the auxiliary input size.
-//  - n_output: the output size.
-//  - output_batch_leading_dim: the leading dimension of the output buffer.
-//
-// Input of size 'n_batch * n_input':
-//   input_ptr
-// Input of size 'n_batch * n_aux_input':
-//   aux_input_ptr                     - optional (can be nullptr)
-//
-// LSTM weights:
-// Input weights of size 'n_cell * n_input':
-//   input_to_input_weights            - optional
-//   input_to_forget_weights
-//   input_to_cell_weights
-//   input_to_output_weights
-// Auxiliary input weights of size 'n_cell * n_aux_input':
-//   aux_input_to_input_weights        - optional
-//   aux_input_to_forget_weights       - optional
-//   aux_input_to_cell_weights         - optional
-//   aux_input_to_output_weights       - optional
-// Recurrent weights of size 'n_cell * n_output':
-//   recurrent_to_input_weights        - optional
-//   recurrent_to_forget_weights
-//   recurrent_to_cell_weights
-//   recurrent_to_input_weights
-// Peephole weights of size 'n_cell', representing diagonal matrices.
-//   cell_to_input_weights             - optional
-//   cell_to_cell_weights              - optional
-//   cell_to_output_weights            - optional
-// Projection weights of size 'n_output * n_cell'
-//   projection_weights_ptr            - optional
-// Gate biases of size 'n_cell':
-//   input_gate_bias_ptr               - optional
-//   forget_gate_bias_ptr
-//   cell_gate_bias_ptr
-//   output_gate_bias_ptr
-//
-// Layer norm coefficients of size 'n_cell', representing diagonal matrices.
-//   input_layer_norm_coefficients_ptr  - optional
-//   forget_layer_norm_coefficients_ptr - optional
-//   cell_layer_norm_coefficients_ptr   - optional
-//   output_layer_norm_coefficients_ptr - optional
-//
-// The pointers to the cell and output state and the output are updated.
-//
-// The pointers input_ptr, aux_input_ptr, and output_ptr point to data aligned
-// in batch_major order, and each step processes batch_size many inputs from
-// input_ptr, and updates batch_size many cell and output states.
-//
-// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the
-// output tensor, and in most cases will be equal to n_output. It is usually not
-// when we want to store the LSTM output into a slice of the output tensor, e.g.
-// for bidirectional LSTMs with merge_outputs. In this case, the batched
-// operations cannot be used since they assume that the batched outputs are
-// contiguous, and we manually loop over the batched outputs.
-void LstmStepFloat(
-    const float* input_ptr, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr,
-    const float* input_layer_norm_coefficients_ptr,
-    const float* forget_layer_norm_coefficients_ptr,
-    const float* cell_layer_norm_coefficients_ptr,
-    const float* output_layer_norm_coefficients_ptr,
-    const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr,
-    const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr,
-    const float* projection_weights_ptr, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, int output_batch_leading_dim,
-    float* output_state_ptr, float* cell_state_ptr, float* scratch0,
-    float* scratch1, float* scratch2, float* scratch3, float* output_ptr) {
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+void Sigmoid(const RuntimeShape& data_shape, int16_t* data) {
+  reference_integer_ops::Logistic(
+      0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
+      data_shape.FlatSize() /*NumElements(input->dims)*/,
+      data /* tflite::micro::GetTensorData<int16_t>(input) */,
+      data /*tflite::micro::GetTensorData<int16_t>(output) */);
+}
 
-  // Make named scratch buffers.
-  float* input_gate_scratch = scratch0;
-  float* forget_gate_scratch = scratch1;
-  float* cell_gate_scratch = scratch2;
-  float* output_gate_scratch = scratch3;
+void Sigmoid(const RuntimeShape& data_shape, float* data) {
+  reference_ops::Logistic(data_shape, data, data_shape, data);
+}
 
-  // Check if inputs are all zeros so we can skip some computations.
-  const bool is_input_all_zeros =
-      tflite::tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
-  const bool is_aux_input_all_zeros =
-      (aux_input_ptr == nullptr || tflite::tensor_utils::IsZeroVector(
-                                       aux_input_ptr, n_batch * n_aux_input));
-  if (!use_cifg) {
-    // Calculate the input gate. (If not CIFG.)
-    lstm_internal::CalculateLstmGateFloat(
-        input_ptr, input_to_input_weights_ptr, aux_input_ptr,
-        aux_input_to_input_weights_ptr, output_state_ptr,
-        recurrent_to_input_weights_ptr, cell_state_ptr,
-        cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr,
-        input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
-        /*activation=*/kTfLiteActSigmoid, input_gate_scratch,
-        is_input_all_zeros, is_aux_input_all_zeros);
-  }
-  // Calculate the forget gate.
-  lstm_internal::CalculateLstmGateFloat(
-      input_ptr, input_to_forget_weights_ptr, aux_input_ptr,
-      aux_input_to_forget_weights_ptr, output_state_ptr,
-      recurrent_to_forget_weights_ptr, cell_state_ptr,
-      cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr,
-      forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
-      /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros,
-      is_aux_input_all_zeros);
-  // Calculate the cell update gate.
-  lstm_internal::CalculateLstmGateFloat(
-      input_ptr, input_to_cell_weights_ptr, aux_input_ptr,
-      aux_input_to_cell_weights_ptr, output_state_ptr,
-      recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
-      /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr,
-      cell_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
-      params->activation, cell_gate_scratch, is_input_all_zeros,
-      is_aux_input_all_zeros);
-  // Update the cell state.
-  lstm_internal::UpdateLstmCellFloat(
-      n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
-      cell_gate_scratch, use_cifg, params->cell_clip);
-  // Calculate output gate.
-  lstm_internal::CalculateLstmGateFloat(
-      input_ptr, input_to_output_weights_ptr, aux_input_ptr,
-      aux_input_to_output_weights_ptr, output_state_ptr,
-      recurrent_to_output_weights_ptr, cell_state_ptr,
-      cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr,
-      output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
-      /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros,
-      is_aux_input_all_zeros);
-  // Update the output state.
-  lstm_internal::CalculateLstmOutputFloat(
-      n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
-      params->activation, projection_weights_ptr, projection_bias_ptr,
-      params->proj_clip, output_state_ptr, scratch2);
-  // Copy output state to the output. Note that the output's rows may not be
-  // contiguous (output_batch_leading_dim != n_output).
-  for (int b = 0; b < n_batch; b++) {
-    std::memcpy(output_ptr + b * output_batch_leading_dim,
-                output_state_ptr + b * n_output, n_output * sizeof(float));
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          int16_t* input_data, const RuntimeShape& output_data_shape,
+          int16_t* output_data) {
+  int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3;
+  int32_t input_multiplier = 0;
+  if (tanh_input_left_shift < 0) /* handling negative shift value */
+  {
+    tanh_input_left_shift = -tanh_input_left_shift;
+    input_multiplier = 3;
   }
+  reference_integer_ops::Tanh(input_multiplier, tanh_input_left_shift,
+                              input_data_shape, input_data, output_data_shape,
+                              output_data);
 }
 
-// Fully quantized lstm kernel for 16 bit gate matmul output.
-//
-// Input tensor of size n_batch * n_input:
-//   input_ptr
-//
-// LSTM weights:
-// Quantized input weights of size 'n_cell * n_input':
-//   input_to_input_weight_ptr            - optional
-//   input_to_forget_weight_ptr           - optional
-//   input_to_cell_weight_ptr             - optional
-//   input_to_output_weight_ptr           - optional
-//
-// Quantized recurrent weights of size 'n_cell * n_output':
-//   recurrent_to_input_weight_ptr        - optional
-//   recurrent_to_forget_weights_ptr
-//   recurrent_to_cell_weights_ptr
-//   recurrent_to_input_weights_ptr
-//
-// Quantized peephole weights of size 'n_cell', representing diagonal matrices.
-//   cell_to_input_weights               - optional
-//   cell_to_cell_weights                - optional
-//   cell_to_output_weights              - optional
-//
-// Quantized projection weights of size 'n_output * n_cell'
-//   projection_weight_ptr                     - optional
-//
-// Weight scales (scalars) for each of the weights above.
-//   effective_input_to_input_scale_a    - optional
-//   effective_input_to_input_scale_b    - optional
-//   effective_input_to_forget_scale_a
-//   effective_input_to_forget_scale_b
-//   effective_input_to_cell_scale_a
-//   effective_input_to_cell_scale_b
-//   effective_input_to_output_scale_a
-//   effective_input_to_output_scale_b
-//   effective_recurrent_to_input_scale_a    - optional
-//   effective_recurrent_to_input_scale_b    - optional
-//   effective_recurrent_to_forget_scale_a
-//   effective_recurrent_to_forget_scale_b
-//   effective_recurrent_to_cell_scale_a
-//   effective_recurrent_to_cell_scale_b
-//   effective_recurrent_to_output_scale_a
-//   effective_recurrent_to_output_scale_b
-//   effective_proj_scale_a                  - optional
-//   effective_proj_scale_b                  - optional
-//
-// Gate biases of size 'n_cell':
-//   input_gate_bias_ptr                 - optional
-//   forget_gate_bias_ptr
-//   cell_gate_bias_ptr
-//   output_gate_bias_ptr
-//
-// Layer norm coefficients of size 'n_cell', representing diagonal matrices.
-//   layer_norm_input_weight_ptr    - optional
-//   layer_norm_forget_weight_ptr   - optional
-//   layer_norm_cell_weight_ptr     - optional
-//   layer_norm_output_weight_ptr   - optional
-//
-// Layer norm scales of size 'n_cell'.
-//   layer_norm_input_scale_a     - optional
-//   layer_norm_input_scale_b     - optional
-//   layer_norm_forget_scale_a    - optional
-//   layer_norm_forget_scale_b    - optional
-//   layer_norm_cell_scale_a      - optional
-//   layer_norm_cell_scale_b      - optional
-//   layer_norm_output_scale_a    - optional
-//   layer_norm_output_scale_b    - optional
-//
-// Scalar values:
-//   quantized_cell_clip: quantized clip value for cell.
-//   quantized_proj_clip: quantized clip value for projection.
-//   cell_state_scale: the power of two scale for cell state.
-//
-// Zero points:
-//   output_state_zp: zero point of output state
-//   hidden_zp: zero point for hidden state.
-//
-// Temporary pre-allocated storage for the calculation. Each is of size n_cell *
-// n_batch.
-//   scratch0
-//   scratch1
-//   scratch2
-//   scratch3
-//   scratch4
-//   scratch5: this scratch buffer is created purely for optimizing the
-//              MatrixBatchVectorMultiplyAccumulate.
-//
-// Outputs:
-//   output_state_ptr - size 'n_batch * n_output'
-//   cell_state_ptr   - size 'n_batch * n_cell'
-//   output_ptr       - size 'n_batch * n_output'
-// TODO(b/159947023): scratch0 is not used if (!cifg). Don't allocate then.
-void LstmStepInteger8x8_16(
-    const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr,
-    int32_t effective_input_to_input_scale_a,
-    int32_t effective_input_to_input_scale_b,
-    const int8_t* input_to_forget_weight_ptr,
-    int32_t effective_input_to_forget_scale_a,
-    int32_t effective_input_to_forget_scale_b,
-    const int8_t* input_to_cell_weight_ptr,
-    int32_t effective_input_to_cell_scale_a,
-    int32_t effective_input_to_cell_scale_b,
-    const int8_t* input_to_output_weight_ptr,
-    int32_t effective_input_to_output_scale_a,
-    int32_t effective_input_to_output_scale_b,
-    const int8_t* recurrent_to_input_weight_ptr,
-    int32_t effective_recurrent_to_input_scale_a,
-    int32_t effective_recurrent_to_input_scale_b,
-    const int8_t* recurrent_to_forget_weight_ptr,
-    int32_t effective_recurrent_to_forget_scale_a,
-    int32_t effective_recurrent_to_forget_scale_b,
-    const int8_t* recurrent_to_cell_weight_ptr,
-    int32_t effective_recurrent_to_cell_scale_a,
-    int32_t effective_recurrent_to_cell_scale_b,
-    const int8_t* recurrent_to_output_weight_ptr,
-    int32_t effective_recurrent_to_output_scale_a,
-    int32_t effective_recurrent_to_output_scale_b,
-    const int16_t* cell_to_input_weight_ptr,
-    int32_t effective_cell_to_input_scale_a,
-    int32_t effective_cell_to_input_scale_b,
-    const int16_t* cell_to_forget_weight_ptr,
-    int32_t effective_cell_to_forget_scale_a,
-    int32_t effective_cell_to_forget_scale_b,
-    const int16_t* cell_to_output_weight_ptr,
-    int32_t effective_cell_to_output_scale_a,
-    int32_t effective_cell_to_output_scale_b,
-    const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a,
-    int32_t effective_proj_scale_b, int32_t hidden_zp,
-    int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b,
-    const int16_t* layer_norm_input_weight_ptr,
-    int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b,
-    const int16_t* layer_norm_forget_weight_ptr,
-    int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b,
-    const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a,
-    int32_t layer_norm_cell_scale_b,
-    const int16_t* layer_norm_output_weight_ptr,
-    int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b,
-    const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr,
-    const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr,
-    int16_t quantized_cell_clip, int8_t quantized_proj_clip,
-    int32_t cell_state_scale, int32_t input_variance_guard,
-    int32_t forget_variance_guard, int32_t cell_variance_guard,
-    int32_t output_variance_guard,
-    const int32_t* input_to_forget_effective_bias,
-    const int32_t* recurrent_to_forget_effective_bias,
-    const int32_t* input_to_cell_effective_bias,
-    const int32_t* recurrent_to_cell_effective_bias,
-    const int32_t* input_to_output_effective_bias,
-    const int32_t* recurrent_to_output_effective_bias,
-    const int32_t* input_to_input_effective_bias,
-    const int32_t* recurrent_to_input_effective_bias,
-    const int32_t* projection_effective_bias, int n_batch, int n_cell,
-    int n_input, int n_output, int8_t* output_state_ptr,
-    int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr,
-    int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3,
-    int8_t* scratch4, int32_t* scratch5) {
-  // Make named scratch buffers for the different gates.
-  int16_t* input_gate_scratch = scratch0;
-  int16_t* forget_gate_scratch = scratch1;
-  int16_t* cell_gate_scratch = scratch2;
-  int16_t* output_gate_scratch = scratch3;
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          float* input_data, const RuntimeShape& output_data_shape,
+          float* output_data) {
+  reference_ops::Tanh(input_data_shape, input_data, output_data_shape,
+                      output_data);
+}
 
-  // Since we have already checked that weights are all there or none, we
-  // can check the existence of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weight_ptr == nullptr);
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int8_t* output_data) {
+  return reference_integer_ops::MulElementwise(
+      shape.FlatSize(), params, input1_data, input2_data, output_data);
+}
 
-  // Check for nullptrs.
-  TFLITE_DCHECK(input_to_forget_effective_bias);
-  TFLITE_DCHECK(recurrent_to_forget_effective_bias);
-  TFLITE_DCHECK(input_to_cell_effective_bias);
-  TFLITE_DCHECK(recurrent_to_cell_effective_bias);
-  TFLITE_DCHECK(input_to_output_effective_bias);
-  TFLITE_DCHECK(recurrent_to_output_effective_bias);
-  if (!use_cifg) {
-    TFLITE_DCHECK(input_to_input_effective_bias);
-    TFLITE_DCHECK(recurrent_to_input_effective_bias);
-  }
-  const bool use_projection = (projection_weight_ptr != nullptr);
-  if (use_projection) {
-    TFLITE_DCHECK(projection_effective_bias);
-  }
-  if (!use_cifg) {
-    // Calculate the input gate. (If not CIFG.)
-    lstm_internal::CalculateLstmGateInteger8x8_16(
-        input_ptr, input_to_input_weight_ptr, input_to_input_effective_bias,
-        effective_input_to_input_scale_a, effective_input_to_input_scale_b,
-        output_state_ptr, recurrent_to_input_weight_ptr,
-        recurrent_to_input_effective_bias, effective_recurrent_to_input_scale_a,
-        effective_recurrent_to_input_scale_b, cell_state_ptr,
-        cell_to_input_weight_ptr, effective_cell_to_input_scale_a,
-        effective_cell_to_input_scale_b, layer_norm_input_weight_ptr,
-        input_gate_bias_ptr, layer_norm_input_scale_a, layer_norm_input_scale_b,
-        input_variance_guard, n_batch, n_input, n_output, n_cell,
-        kTfLiteActSigmoid, input_gate_scratch, scratch5);
-  }
-  // Calculate the forget gate.
-  lstm_internal::CalculateLstmGateInteger8x8_16(
-      input_ptr, input_to_forget_weight_ptr, input_to_forget_effective_bias,
-      effective_input_to_forget_scale_a, effective_input_to_forget_scale_b,
-      output_state_ptr, recurrent_to_forget_weight_ptr,
-      recurrent_to_forget_effective_bias, effective_recurrent_to_forget_scale_a,
-      effective_recurrent_to_forget_scale_b, cell_state_ptr,
-      cell_to_forget_weight_ptr, effective_cell_to_forget_scale_a,
-      effective_cell_to_forget_scale_b, layer_norm_forget_weight_ptr,
-      forget_gate_bias_ptr, layer_norm_forget_scale_a,
-      layer_norm_forget_scale_b, forget_variance_guard, n_batch, n_input,
-      n_output, n_cell, kTfLiteActSigmoid, forget_gate_scratch, scratch5);
-  // Calculate the cell update gate.
-  lstm_internal::CalculateLstmGateInteger8x8_16(
-      input_ptr, input_to_cell_weight_ptr, input_to_cell_effective_bias,
-      effective_input_to_cell_scale_a, effective_input_to_cell_scale_b,
-      output_state_ptr, recurrent_to_cell_weight_ptr,
-      recurrent_to_cell_effective_bias, effective_recurrent_to_cell_scale_a,
-      effective_recurrent_to_cell_scale_b, cell_state_ptr,
-      /*cell_to_gate_weights=*/nullptr, /*cell_to_gate_scale_a=*/0,
-      /*cell_to_gate_scale_b=*/0, layer_norm_cell_weight_ptr,
-      cell_gate_bias_ptr, layer_norm_cell_scale_a, layer_norm_cell_scale_b,
-      cell_variance_guard, n_batch, n_input, n_output, n_cell, kTfLiteActTanh,
-      cell_gate_scratch, scratch5);
-  // Update the cell state.
-  lstm_internal::UpdateLstmCellInteger(
-      n_batch, n_cell, cell_state_ptr, cell_state_scale, input_gate_scratch,
-      forget_gate_scratch, cell_gate_scratch, use_cifg, quantized_cell_clip);
-  // Calculate the output gate.
-  lstm_internal::CalculateLstmGateInteger8x8_16(
-      input_ptr, input_to_output_weight_ptr, input_to_output_effective_bias,
-      effective_input_to_output_scale_a, effective_input_to_output_scale_b,
-      output_state_ptr, recurrent_to_output_weight_ptr,
-      recurrent_to_output_effective_bias, effective_recurrent_to_output_scale_a,
-      effective_recurrent_to_output_scale_b, cell_state_ptr,
-      cell_to_output_weight_ptr, effective_cell_to_output_scale_a,
-      effective_cell_to_output_scale_b, layer_norm_output_weight_ptr,
-      output_gate_bias_ptr, layer_norm_output_scale_a,
-      layer_norm_output_scale_b, output_variance_guard, n_batch, n_input,
-      n_output, n_cell, kTfLiteActSigmoid, output_gate_scratch, scratch5);
-  // Update the output state.
-  lstm_internal::CalculateLstmOutputInteger8x8_16(
-      n_batch, n_cell, n_output, cell_state_ptr, cell_state_scale,
-      output_gate_scratch, effective_hidden_scale_a, effective_hidden_scale_b,
-      hidden_zp, projection_weight_ptr, effective_proj_scale_a,
-      effective_proj_scale_b, projection_effective_bias, output_state_zp,
-      quantized_proj_clip, output_state_ptr, scratch0, scratch4, scratch5);
-  // Copy output state to the output. Note that unlike float or hybrid, output
-  // is always contiguous.
-  std::memcpy(output_ptr, output_state_ptr,
-              n_batch * n_output * sizeof(int8_t));
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int16_t* output_data) {
+  return reference_integer_ops::MulElementwise(
+      shape.FlatSize(), params, input1_data, input2_data, output_data);
 }
 
-}  // namespace lstm_internal
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const float* input1_data, const float* input2_data,
+         float* output_data) {
+  return reference_ops::Mul(params, shape, input1_data, shape, input2_data,
+                            shape, output_data);
+}
 
-TfLiteStatus EvalFloatLstm(
-    const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* input_to_input_weights,
-    const TfLiteEvalTensor* input_to_forget_weights,
-    const TfLiteEvalTensor* input_to_cell_weights,
-    const TfLiteEvalTensor* input_to_output_weights,
-    const TfLiteEvalTensor* recurrent_to_input_weights,
-    const TfLiteEvalTensor* recurrent_to_forget_weights,
-    const TfLiteEvalTensor* recurrent_to_cell_weights,
-    const TfLiteEvalTensor* recurrent_to_output_weights,
-    const TfLiteEvalTensor* cell_to_input_weights,
-    const TfLiteEvalTensor* cell_to_forget_weights,
-    const TfLiteEvalTensor* cell_to_output_weights,
-    const TfLiteEvalTensor* input_layer_norm_coefficients,
-    const TfLiteEvalTensor* forget_layer_norm_coefficients,
-    const TfLiteEvalTensor* cell_layer_norm_coefficients,
-    const TfLiteEvalTensor* output_layer_norm_coefficients,
-    const TfLiteEvalTensor* aux_input,
-    const TfLiteEvalTensor* aux_input_to_input_weights,
-    const TfLiteEvalTensor* aux_input_to_forget_weights,
-    const TfLiteEvalTensor* aux_input_to_cell_weights,
-    const TfLiteEvalTensor* aux_input_to_output_weights,
-    const TfLiteEvalTensor* input_gate_bias,
-    const TfLiteEvalTensor* forget_gate_bias,
-    const TfLiteEvalTensor* cell_gate_bias,
-    const TfLiteEvalTensor* output_gate_bias,
-    const TfLiteEvalTensor* projection_weights,
-    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
-    bool forward_sequence, bool time_major, int output_offset,
-    float* scratch_buffer, TfLiteEvalTensor* output_state,
-    TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3);
-  int max_time, n_batch;
-  if (input->dims->size == 3) {
-    max_time = (time_major) ? input->dims->data[0] : input->dims->data[1];
-    n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0];
-  } else {
-    max_time = 1;
-    n_batch = input->dims->data[0];
-  }
-  const int n_input = input->dims->data[input->dims->size - 1];
-  const int aux_input_size =
-      (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0;
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int8_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int32_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data) {
+  return tflite::reference_integer_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
 
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int16_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int64_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data) {
+  return tflite::reference_integer_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
 
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& filter_shape, const float* filter_data,
+                    const RuntimeShape& bias_shape, const float* bias_data,
+                    const RuntimeShape& output_shape, float* output_data) {
+  return tflite::reference_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
 
-  // Index the scratch buffers pointers to the global scratch buffer.
-  float* input_gate_scratch = nullptr;
-  float* cell_gate_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_gate_scratch = scratch_buffer;
-    forget_gate_scratch = scratch_buffer + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer;
-    cell_gate_scratch = scratch_buffer + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer + 3 * n_cell * n_batch;
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              int16_t* vector) {
+  for (int i = 0; i < v_size; i++) {
+    vector[i] =
+        std::max(std::min(cell_state_info.quantized_cell_clip, vector[i]),
+                 static_cast<int16_t>(-cell_state_info.quantized_cell_clip));
   }
+}
 
-  const int output_batch_leading_dim =
-      output->dims->data[output->dims->size - 1];
-  if (time_major) {
-    // Loop through the sequence.
-    const int input_step = n_batch * n_input;
-    const int output_step = n_batch * output_batch_leading_dim;
-    for (int t = 0; t < max_time; t++) {
-      // If this is the forward_sequence, step forward, otherwise step
-      // backwards.
-      const int t_rel = forward_sequence ? t : max_time - t - 1;
-      const float* input_ptr =
-          tflite::micro::GetTensorData<float>(input) + t_rel * input_step;
-      const float* aux_input_ptr = nullptr;
-      if (aux_input) {
-        aux_input_ptr =
-            tflite::micro::GetTensorData<float>(aux_input) + t_rel * input_step;
-      }
-      float* output_ptr = tflite::micro::GetTensorData<float>(output) +
-                          t_rel * output_step + output_offset;
-
-      lstm_internal::LstmStepFloat(
-          input_ptr,
-          input_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(input_to_input_weights),
-          input_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(input_to_forget_weights),
-          input_to_cell_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(input_to_cell_weights),
-          input_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(input_to_output_weights),
-          aux_input_ptr,
-          aux_input_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(aux_input_to_input_weights),
-          aux_input_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    aux_input_to_forget_weights),
-          aux_input_to_cell_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(aux_input_to_cell_weights),
-          aux_input_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    aux_input_to_output_weights),
-          recurrent_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(recurrent_to_input_weights),
-          recurrent_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    recurrent_to_forget_weights),
-          recurrent_to_cell_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(recurrent_to_cell_weights),
-          recurrent_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    recurrent_to_output_weights),
-          cell_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(cell_to_input_weights),
-          cell_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(cell_to_forget_weights),
-          cell_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(cell_to_output_weights),
-          input_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    input_layer_norm_coefficients),
-          forget_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    forget_layer_norm_coefficients),
-          cell_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    cell_layer_norm_coefficients),
-          output_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(
-                    output_layer_norm_coefficients),
-          input_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(input_gate_bias),
-          forget_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(forget_gate_bias),
-          cell_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(cell_gate_bias),
-          output_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(output_gate_bias),
-          projection_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(projection_weights),
-          projection_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<float>(projection_bias),
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          output_batch_leading_dim,
-          tflite::micro::GetTensorData<float>(output_state),
-          tflite::micro::GetTensorData<float>(cell_state), input_gate_scratch,
-          forget_gate_scratch, cell_gate_scratch, output_gate_scratch,
-          output_ptr);
-    }
-  } else {
-    for (int b = 0; b < n_batch; b++) {
-      const int input_step = n_input;
-      const int output_step = output_batch_leading_dim;
-      for (int t = 0; t < max_time; t++) {
-        // If this is the forward_sequence, step forward, otherwise step
-        // backwards.
-        const int t_rel = forward_sequence ? t : max_time - t - 1;
-        const int time_offset = b * max_time + t_rel;
-        const float* input_ptr = tflite::micro::GetTensorData<float>(input) +
-                                 time_offset * input_step;
-        const float* aux_input_ptr = nullptr;
-        if (aux_input) {
-          aux_input_ptr = tflite::micro::GetTensorData<float>(aux_input) +
-                          time_offset * input_step;
-        }
-        float* output_ptr = tflite::micro::GetTensorData<float>(output) +
-                            time_offset * output_step + output_offset;
-
-        // Offset the {output,cell}_state pointers to the right batch.
-        float* output_state_ptr =
-            tflite::micro::GetTensorData<float>(output_state) +
-            b * output_batch_leading_dim;
-        float* cell_state_ptr =
-            tflite::micro::GetTensorData<float>(cell_state) + b * n_cell;
-        // Offset the scratch pointers to the right batch.
-        float* input_gate_scratch_ptr =
-            input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
-        float* forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
-        float* cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
-        float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
-
-        lstm_internal::LstmStepFloat(
-            input_ptr,
-            input_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(input_to_input_weights),
-            input_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(input_to_forget_weights),
-            input_to_cell_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(input_to_cell_weights),
-            input_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(input_to_output_weights),
-            aux_input_ptr,
-            aux_input_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      aux_input_to_input_weights),
-            aux_input_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      aux_input_to_forget_weights),
-            aux_input_to_cell_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      aux_input_to_cell_weights),
-            aux_input_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      aux_input_to_output_weights),
-            recurrent_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      recurrent_to_input_weights),
-            recurrent_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      recurrent_to_forget_weights),
-            recurrent_to_cell_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      recurrent_to_cell_weights),
-            recurrent_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      recurrent_to_output_weights),
-            cell_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(cell_to_input_weights),
-            cell_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(cell_to_forget_weights),
-            cell_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(cell_to_output_weights),
-            input_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      input_layer_norm_coefficients),
-            forget_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      forget_layer_norm_coefficients),
-            cell_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      cell_layer_norm_coefficients),
-            output_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(
-                      output_layer_norm_coefficients),
-            input_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(input_gate_bias),
-            forget_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(forget_gate_bias),
-            cell_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(cell_gate_bias),
-            output_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(output_gate_bias),
-            projection_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(projection_weights),
-            projection_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<float>(projection_bias),
-            params,
-            /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output,
-            output_batch_leading_dim, output_state_ptr, cell_state_ptr,
-            input_gate_scratch_ptr, forget_gate_scratch_ptr,
-            cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
-      }
-    }
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              float* vector) {
+  for (int i = 0; i < v_size; i++) {
+    vector[i] = std::max(std::min(cell_state_info.cell_clip, vector[i]),
+                         -cell_state_info.cell_clip);
   }
-  return kTfLiteOk;
 }
 
-TfLiteStatus EvalInteger8x8_16Lstm(
-    const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* input_to_input_weights,
-    const TfLiteEvalTensor* input_to_forget_weights,
-    const TfLiteEvalTensor* input_to_cell_weights,
-    const TfLiteEvalTensor* input_to_output_weights,
-    const TfLiteEvalTensor* recurrent_to_input_weights,
-    const TfLiteEvalTensor* recurrent_to_forget_weights,
-    const TfLiteEvalTensor* recurrent_to_cell_weights,
-    const TfLiteEvalTensor* recurrent_to_output_weights,
-    const TfLiteEvalTensor* cell_to_input_weights,
-    const TfLiteEvalTensor* cell_to_forget_weights,
-    const TfLiteEvalTensor* cell_to_output_weights,
-    const TfLiteEvalTensor* input_layer_norm_coefficients,
-    const TfLiteEvalTensor* forget_layer_norm_coefficients,
-    const TfLiteEvalTensor* cell_layer_norm_coefficients,
-    const TfLiteEvalTensor* output_layer_norm_coefficients,
-    const TfLiteEvalTensor* input_gate_bias,
-    const TfLiteEvalTensor* forget_gate_bias,
-    const TfLiteEvalTensor* cell_gate_bias,
-    const TfLiteEvalTensor* output_gate_bias,
-    const TfLiteEvalTensor* projection_weights,
-    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
-    bool forward_sequence, bool time_major,
-    const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp,
-    TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
-    TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1,
-    int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5) {
-  TFLITE_DCHECK(input->dims->size >= 2 && input->dims->size <= 3);
-  const int n_input = input->dims->data[input->dims->size - 1];
-  int max_time, n_batch;
-  if (input->dims->size == 2) {
-    max_time = 1;
-    n_batch = input->dims->data[0];
-  } else {
-    max_time = (time_major) ? input->dims->data[0] : input->dims->data[1];
-    n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0];
+// Increment the data offset so the sigle time step invocation call can access
+// the corresponding input/output tensor data at the time step
+void LstmStepManager::UpdateTime() {
+  current_time_ += 1;
+  TFLITE_DCHECK_LE(current_time_, size_info_.time_steps);
+  // default as one batch per inference
+  int input_step = size_info_.input_dimension;
+  int output_step = size_info_.state_dimension;
+  // time major: batch inference
+  if (size_info_.time_major) {
+    input_step = input_step * size_info_.batch_size;
+    output_step = output_step * size_info_.batch_size;
   }
 
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Get params for time/batch/sequence.
-  const int output_batch_leading_dim =
-      output->dims->data[output->dims->size - 1];
-
-  if (time_major) {
-    const int input_step = n_batch * n_input;
-    const int output_step = n_batch * output_batch_leading_dim;
-    for (int t = 0; t < max_time; t++) {
-      const int t_rel = t;
-      int8_t* output_ptr =
-          tflite::micro::GetTensorData<int8_t>(output) + t_rel * output_step;
-      const int8_t* input_ptr =
-          tflite::micro::GetTensorData<int8_t>(input) + t_rel * input_step;
-      lstm_internal::LstmStepInteger8x8_16(
-          input_ptr,
-          input_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(input_to_input_weights),
-          integer_lstm_param->effective_input_to_input_scale_a,
-          integer_lstm_param->effective_input_to_input_scale_b,
-          input_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(input_to_forget_weights),
-          integer_lstm_param->effective_input_to_forget_scale_a,
-          integer_lstm_param->effective_input_to_forget_scale_b,
-          input_to_cell_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(input_to_cell_weights),
-          integer_lstm_param->effective_input_to_cell_scale_a,
-          integer_lstm_param->effective_input_to_cell_scale_b,
-          input_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(input_to_output_weights),
-          integer_lstm_param->effective_input_to_output_scale_a,
-          integer_lstm_param->effective_input_to_output_scale_b,
-          recurrent_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(
-                    recurrent_to_input_weights),
-          integer_lstm_param->effective_recurrent_to_input_scale_a,
-          integer_lstm_param->effective_recurrent_to_input_scale_b,
-          recurrent_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(
-                    recurrent_to_forget_weights),
-          integer_lstm_param->effective_recurrent_to_forget_scale_a,
-          integer_lstm_param->effective_recurrent_to_forget_scale_b,
-          recurrent_to_cell_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(recurrent_to_cell_weights),
-          integer_lstm_param->effective_recurrent_to_cell_scale_a,
-          integer_lstm_param->effective_recurrent_to_cell_scale_b,
-          recurrent_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(
-                    recurrent_to_output_weights),
-          integer_lstm_param->effective_recurrent_to_output_scale_a,
-          integer_lstm_param->effective_recurrent_to_output_scale_b,
-          cell_to_input_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(cell_to_input_weights),
-          integer_lstm_param->effective_cell_to_input_scale_a,
-          integer_lstm_param->effective_cell_to_input_scale_b,
-          cell_to_forget_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(cell_to_forget_weights),
-          integer_lstm_param->effective_cell_to_forget_scale_a,
-          integer_lstm_param->effective_cell_to_forget_scale_b,
-          cell_to_output_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(cell_to_output_weights),
-          integer_lstm_param->effective_cell_to_output_scale_a,
-          integer_lstm_param->effective_cell_to_output_scale_b,
-          projection_weights == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int8_t>(projection_weights),
-          integer_lstm_param->effective_proj_scale_a,
-          integer_lstm_param->effective_proj_scale_b,
-          integer_lstm_param->hidden_zp,
-          integer_lstm_param->effective_hidden_scale_a,
-          integer_lstm_param->effective_hidden_scale_b,
-          input_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(
-                    input_layer_norm_coefficients),
-          integer_lstm_param->layer_norm_input_scale_a,
-          integer_lstm_param->layer_norm_input_scale_b,
-          forget_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(
-                    forget_layer_norm_coefficients),
-          integer_lstm_param->layer_norm_forget_scale_a,
-          integer_lstm_param->layer_norm_forget_scale_b,
-          cell_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(
-                    cell_layer_norm_coefficients),
-          integer_lstm_param->layer_norm_cell_scale_a,
-          integer_lstm_param->layer_norm_cell_scale_b,
-          output_layer_norm_coefficients == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int16_t>(
-                    output_layer_norm_coefficients),
-          integer_lstm_param->layer_norm_output_scale_a,
-          integer_lstm_param->layer_norm_output_scale_b,
-          input_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int32_t>(input_gate_bias),
-          forget_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int32_t>(forget_gate_bias),
-          cell_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int32_t>(cell_gate_bias),
-          output_gate_bias == nullptr
-              ? nullptr
-              : tflite::micro::GetTensorData<int32_t>(output_gate_bias),
-          integer_lstm_param->quantized_cell_clip,
-          integer_lstm_param->quantized_proj_clip,
-          integer_lstm_param->cell_scale,
-          integer_lstm_param->input_variance_guard,
-          integer_lstm_param->forget_variance_guard,
-          integer_lstm_param->cell_variance_guard,
-          integer_lstm_param->output_variance_guard,
-          integer_lstm_param->input_to_forget_effective_bias,
-          integer_lstm_param->recurrent_to_forget_effective_bias,
-          integer_lstm_param->input_to_cell_effective_bias,
-          integer_lstm_param->recurrent_to_cell_effective_bias,
-          integer_lstm_param->input_to_output_effective_bias,
-          integer_lstm_param->recurrent_to_output_effective_bias,
-          integer_lstm_param->input_to_input_effective_bias,
-          integer_lstm_param->recurrent_to_input_effective_bias,
-          integer_lstm_param->projection_effective_bias, n_batch, n_cell,
-          n_input, n_output, tflite::micro::GetTensorData<int8_t>(output_state),
-          output_state_zp, tflite::micro::GetTensorData<int16_t>(cell_state),
-          output_ptr, scratch0, scratch1, scratch2, scratch3, scratch4,
-          scratch5);
-    }
-  } else {
-    for (int b = 0; b < n_batch; b++) {
-      const int input_step = n_input;
-      const int output_step = output_batch_leading_dim;
-      for (int t = 0; t < max_time; t++) {
-        // If this is the forward_sequence, step forward, otherwise step
-        // backwards.
-        const int t_rel = forward_sequence ? t : max_time - t - 1;
-        const int time_offset = b * max_time + t_rel;
-        const int8_t* input_ptr = tflite::micro::GetTensorData<int8_t>(input) +
-                                  time_offset * input_step;
-        int8_t* output_ptr = tflite::micro::GetTensorData<int8_t>(output) +
-                             time_offset * output_step;
+  input_offset_ += input_step;
+  output_offset_ += output_step;
+}
 
-        // Offset the {output,cell}_state pointers to the right batch.
-        int8_t* output_state_ptr =
-            tflite::micro::GetTensorData<int8_t>(output_state) +
-            b * output_batch_leading_dim;
-        int16_t* cell_state_ptr =
-            tflite::micro::GetTensorData<int16_t>(cell_state) + b * n_cell;
+// Increment the data offset so the sigle time step invocation call can access
+// the corresponding hidden/cell state tensor data at the time step (for single
+// batch inference only)
+void LstmStepManager::UpdateBatch() {
+  current_batch_ += 1;
+  TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size);
+  // batch inference for time major: no action needed
+  if (size_info_.time_major) {
+    return;
+  }
+  // otherwise: singe batch inference, go to the next batch
+  hidden_state_offset_ += size_info_.state_dimension;
+  cell_state_offset_ += size_info_.state_dimension;
+}
 
-        lstm_internal::LstmStepInteger8x8_16(
-            input_ptr,
-            input_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(input_to_input_weights),
-            integer_lstm_param->effective_input_to_input_scale_a,
-            integer_lstm_param->effective_input_to_input_scale_b,
-            input_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(input_to_forget_weights),
-            integer_lstm_param->effective_input_to_forget_scale_a,
-            integer_lstm_param->effective_input_to_forget_scale_b,
-            input_to_cell_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(input_to_cell_weights),
-            integer_lstm_param->effective_input_to_cell_scale_a,
-            integer_lstm_param->effective_input_to_cell_scale_b,
-            input_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(input_to_output_weights),
-            integer_lstm_param->effective_input_to_output_scale_a,
-            integer_lstm_param->effective_input_to_output_scale_b,
-            recurrent_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(
-                      recurrent_to_input_weights),
-            integer_lstm_param->effective_recurrent_to_input_scale_a,
-            integer_lstm_param->effective_recurrent_to_input_scale_b,
-            recurrent_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(
-                      recurrent_to_forget_weights),
-            integer_lstm_param->effective_recurrent_to_forget_scale_a,
-            integer_lstm_param->effective_recurrent_to_forget_scale_b,
-            recurrent_to_cell_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(
-                      recurrent_to_cell_weights),
-            integer_lstm_param->effective_recurrent_to_cell_scale_a,
-            integer_lstm_param->effective_recurrent_to_cell_scale_b,
-            recurrent_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(
-                      recurrent_to_output_weights),
-            integer_lstm_param->effective_recurrent_to_output_scale_a,
-            integer_lstm_param->effective_recurrent_to_output_scale_b,
-            cell_to_input_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(cell_to_input_weights),
-            integer_lstm_param->effective_cell_to_input_scale_a,
-            integer_lstm_param->effective_cell_to_input_scale_b,
-            cell_to_forget_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(cell_to_forget_weights),
-            integer_lstm_param->effective_cell_to_forget_scale_a,
-            integer_lstm_param->effective_cell_to_forget_scale_b,
-            cell_to_output_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(cell_to_output_weights),
-            integer_lstm_param->effective_cell_to_output_scale_a,
-            integer_lstm_param->effective_cell_to_output_scale_b,
-            projection_weights == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int8_t>(projection_weights),
-            integer_lstm_param->effective_proj_scale_a,
-            integer_lstm_param->effective_proj_scale_b,
-            integer_lstm_param->hidden_zp,
-            integer_lstm_param->effective_hidden_scale_a,
-            integer_lstm_param->effective_hidden_scale_b,
-            input_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(
-                      input_layer_norm_coefficients),
-            integer_lstm_param->layer_norm_input_scale_a,
-            integer_lstm_param->layer_norm_input_scale_b,
-            forget_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(
-                      forget_layer_norm_coefficients),
-            integer_lstm_param->layer_norm_forget_scale_a,
-            integer_lstm_param->layer_norm_forget_scale_b,
-            cell_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(
-                      cell_layer_norm_coefficients),
-            integer_lstm_param->layer_norm_cell_scale_a,
-            integer_lstm_param->layer_norm_cell_scale_b,
-            output_layer_norm_coefficients == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int16_t>(
-                      output_layer_norm_coefficients),
-            integer_lstm_param->layer_norm_output_scale_a,
-            integer_lstm_param->layer_norm_output_scale_b,
-            input_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int32_t>(input_gate_bias),
-            forget_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int32_t>(forget_gate_bias),
-            cell_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int32_t>(cell_gate_bias),
-            output_gate_bias == nullptr
-                ? nullptr
-                : tflite::micro::GetTensorData<int32_t>(output_gate_bias),
-            integer_lstm_param->quantized_cell_clip,
-            integer_lstm_param->quantized_proj_clip,
-            integer_lstm_param->cell_scale,
-            integer_lstm_param->input_variance_guard,
-            integer_lstm_param->forget_variance_guard,
-            integer_lstm_param->cell_variance_guard,
-            integer_lstm_param->output_variance_guard,
-            integer_lstm_param->input_to_forget_effective_bias,
-            integer_lstm_param->recurrent_to_forget_effective_bias,
-            integer_lstm_param->input_to_cell_effective_bias,
-            integer_lstm_param->recurrent_to_cell_effective_bias,
-            integer_lstm_param->input_to_output_effective_bias,
-            integer_lstm_param->recurrent_to_output_effective_bias,
-            integer_lstm_param->input_to_input_effective_bias,
-            integer_lstm_param->recurrent_to_input_effective_bias,
-            integer_lstm_param->projection_effective_bias, /*n_batch=*/1,
-            n_cell, n_input, n_output, output_state_ptr, output_state_zp,
-            cell_state_ptr, output_ptr, scratch0, scratch1, scratch2, scratch3,
-            scratch4, scratch5);
-      }
-    }
+// Input shape for each single time LSTM invocation.
+// Multi-batch for time_major input
+RuntimeShape LstmStepManager::InputShape() const {
+  int batch_size = 1;
+  if (size_info_.time_major) {
+    batch_size = size_info_.batch_size;
   }
+  const int dims[2] = {batch_size, size_info_.input_dimension};
+  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims);
+  return RuntimeShape(2, dims_data);
+}
 
-  return kTfLiteOk;
+// State shape (both hidden and cell) for each single time LSTM invocation.
+// Multi-batch for time_major input
+RuntimeShape LstmStepManager::StateShape() const {
+  int batch_size = 1;
+  if (size_info_.time_major) {
+    batch_size = size_info_.batch_size;
+  }
+  const int dims[2] = {batch_size, size_info_.state_dimension};
+  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims);
+  return RuntimeShape(2, dims_data);
 }
 
-}  // namespace tflite
\ No newline at end of file
+}  // namespace lstm_internal
+}  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h
index 7794adb5a..62bc6354e 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,304 +12,530 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
 
+// Functions to perform integer evaulation for standard LSTM (e.g., defined in
+// the keras lstm layer, no peephole etc.). Currently used by the 16 bits
+// activation case only
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_
+#include <algorithm>
 #include <cstdint>
-#include <memory>
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/lstm_shared.h"
+#include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
+
+// Interface to access all the TempTfLiteTensors of the LSTM kernel during the
+// preparation phase. Can only be constructed through the constructor to avoid
+// memory leakage. All TempTfLiteTensors will be deallocated through the
+// destructor.
+class LstmTensors {
+ public:
+  LstmTensors(const LstmTensors& other) = delete;
+  LstmTensors& operator=(const LstmTensors& other) = delete;
+
+  LstmTensors(TfLiteContext* context, TfLiteNode* node);
+  ~LstmTensors();
+
+  // Verify the LSTM internal tensor properties (e.g., type checks)
+  // Input/output/states/fc weights tensors are required for kernel evaulation.
+  // The state tensors should be variables. Variants of the standard LSTM
+  // are not supported here, therefore their corresponding tensors should be
+  // invalid
+  TfLiteStatus ValidateTensorStatus(TfLiteContext* context) const;
+
+  // Internal tensors. see lstm_shared.h for tensor names
+  const TfLiteTensor* GetInternalTensor(const int tensor_index) const {
+    return internal_tensors_[tensor_index];
+  }
+
+  const TfLiteTensor* HiddenStateTensor() const {
+    return internal_tensors_[kLstmOutputStateTensor];
+  }
+  const TfLiteTensor* CellStateTensor() const {
+    return internal_tensors_[kLstmCellStateTensor];
+  }
+  const TfLiteTensor* OutputTensor() const { return output_tensor_; }
+
+ private:
+  // see lstm_shared.h for tensor names
+  MicroContext* micro_context_;
+  TfLiteTensor* internal_tensors_[24];
+  TfLiteTensor* output_tensor_;
+};
+
+// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I),
+// State dimension (S)) that defines the LSTM using the input and hidden state
+// tensor
+LstmSizeInfo CreateLstmSizeInfo(
+    const bool time_major, const TfLiteIntArray* input_tensor_shape,
+    const TfLiteIntArray* hidden_state_tensor_shape);
+
+TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context,
+                                      const TfLiteTensor* tensor, int dim1_size,
+                                      int dim2_size);
+
+TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context,
+                                    const TfLiteTensor* tensor, int size);
+
+// Go through every tensors and make sure their shape match the kernel
+// configuration
+TfLiteStatus ValidateTensorSize(TfLiteContext* context,
+                                const LstmTensors& tensors,
+                                const LstmSizeInfo& size_info);
+
+// Wrapper function to create gate parameters for the four internal LSTM gates
+TfLiteStatus CreateGateParams(
+    TfLiteContext* context,
+    /*Input tensors*/
+    const TfLiteTensor* input, const TfLiteTensor* input_weight,
+    const TfLiteTensor* input_bias,
+    /*Hidden state tensors*/
+    const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight,
+    const TfLiteTensor* hidden_state_bias,
+    /*Scale of the fc output (input to non-linear activation)*/
+    const float nonlinear_activation_input_scale, const TfLiteType cell_type,
+    const tflite::GateParameters& gate_params);
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale,
+                                                  const float input2_scale,
+                                                  const float output_scale,
+                                                  const TfLiteType output_type,
+                                                  const int output_zp = 0);
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale,
+                                      const float cell_clip);
+
+CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip);
+tflite::FullyConnectedParams CreateFCParamsFloat();
+
+tflite::GateParameters CreateGateParamsFloat();
+
+tflite::ArithmeticParams CreateInterGateMulParamsFloat();
+
+TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context,
+                                        const LstmTensors& lstm_tensors,
+                                        OpDataLSTM* op_data_lstm);
+
+TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context,
+                                          const LstmTensors& lstm_tensors,
+                                          OpDataLSTM* op_data_lstm);
+
+LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context,
+                                           TfLiteNode* node);
+
+template <typename CellType>
+LSTMBuffers<CellType> CreateLSTMBuffers(TfLiteContext* context,
+                                        const int* buffer_indices) {
+  LSTMBuffers<CellType> buffers;
+  buffers.buffer0 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[0]));
+  buffers.buffer1 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[1]));
+  buffers.buffer2 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[2]));
+  buffers.buffer3 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[3]));
+  return buffers;
+}
+
 // Since LSTM includes multiple intermediate stages, introducing the internal
 // namespace to expose them for testing
 namespace lstm_internal {
-void CalculateLstmGateFloat(
-    const float* input, const float* input_to_gate_weights,
-    const float* aux_input, const float* aux_input_to_gate_weights,
-    const float* output_state, const float* recurrent_to_gate_weights,
-    const float* cell_state, const float* cell_to_gate_weights,
-    const float* layer_norm_coefficients, const float* gate_bias,
-    const int n_batch, const int n_input, const int n_aux_input,
-    const int n_output, const int n_cell,
-    const TfLiteFusedActivation activation, float* gate,
-    const bool is_input_all_zeros, const bool is_aux_input_all_zeros);
-
-void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state,
-                         const float* input_gate, float* forget_gate,
-                         const float* cell_gate, bool use_cifg, float clip);
-
-void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output,
-                              const float* cell_state, const float* output_gate,
-                              TfLiteFusedActivation activation,
-                              const float* projection_weights,
-                              const float* projection_bias,
-                              const float proj_clip, float* output_state,
-                              float* scratch);
-
-void CalculateLstmGateInteger8x8_16(
-    // Input and weights
-    const int8_t* input, const int8_t* input_to_gate_weights,
-    const int32_t* input_to_gate_bias, const int32_t input_to_gate_scale_a,
-    const int32_t input_to_gate_scale_b,
-    // Output state and weights
-    const int8_t* output_state, const int8_t* recurrent_to_gate_weights,
-    const int32_t* recurrent_to_gate_bias,
-    const int32_t recurrent_to_gate_scale_a,
-    const int32_t recurrent_to_gate_scale_b,
-    // Cell state and weights
-    const int16_t* cell_state, const int16_t* cell_to_gate_weights,
-    const int32_t cell_to_gate_scale_a, const int32_t cell_to_gate_scale_b,
-    // Layer normalization parameters (layer norm LSTM)
-    const int16_t* layer_norm_coefficients, const int32_t* layer_norm_bias,
-    const int32_t layer_norm_input_scale_a,
-    const int32_t layer_norm_input_scale_b,
-    const int32_t layer_norm_variance_guard,
-    // Array sizes
-    const int n_batch, const int n_input, const int n_output, const int n_cell,
-    const TfLiteFusedActivation activation,
+
+void Sigmoid(const RuntimeShape& data_shape, int16_t* data);
+
+void Sigmoid(const RuntimeShape& data_shape, float* data);
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          int16_t* input_data, const RuntimeShape& output_data_shape,
+          int16_t* output_data);
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          float* input_data, const RuntimeShape& output_data_shape,
+          float* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int8_t* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int16_t* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const float* input1_data, const float* input2_data,
+         float* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int8_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int32_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int16_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int64_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& filter_shape, const float* filter_data,
+                    const RuntimeShape& bias_shape, const float* bias_data,
+                    const RuntimeShape& output_shape, float* output_data);
+
+void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
+                    int n_input, int16_t* output);
+
+void AddElementWise(const float* input_1, const float* input_2, int n_batch,
+                    int n_input, float* output);
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              int16_t* vector);
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              float* vector);
+
+// Manages the slice position (offset), slice length (sliced tensor shape),
+// and update rules for input/output/hidden state/cell state tensors at each
+// time step.
+class LstmStepManager {
+ public:
+  LstmStepManager() = delete;
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit LstmStepManager(const LstmSizeInfo* size_info)
+      : size_info_(*size_info) {}
+
+  void UpdateTime();
+  void UpdateBatch();
+
+  void ResetTime() { current_time_ = 0; }
+  RuntimeShape InputShape() const;
+  RuntimeShape StateShape() const;
+
+  int InputOffset() const { return input_offset_; }
+  int OutputOffset() const { return output_offset_; }
+  int HiddenStateOffset() const { return hidden_state_offset_; }
+  int CellStateOffset() const { return cell_state_offset_; }
+
+ private:
+  int current_time_ = 0;
+  int current_batch_ = 0;
+  int input_offset_ = 0;
+  int output_offset_ = 0;
+  int hidden_state_offset_ = 0;
+  int cell_state_offset_ = 0;
+  // Sizeinfo is from LstmOpData, which reside in the memory arena
+  // (guarante to outlast LSTMStepManager, which reside in stack)
+  const LstmSizeInfo& size_info_;
+};
+
+// Calculates a single LSTM gate.
+// Implements the following formula:
+//   gate = activate(FC(input) + FC(recurrent))
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+void CalculateLstmGate(
+    const LstmStepManager& step_info, const GateParameters& gate_params,
+    // Input FC
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight,
+    const TfLiteEvalTensor* input_bias,
+    // Recurrent FC
+    const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight,
+    const TfLiteEvalTensor* recurrent_bias,
     // Output
-    int16_t* gate,
-    // Parameters for performance optimizations
+    CellType* gate_output,
     // Scratch arrays
-    int32_t* scratch5);
-
-void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state,
-                           int32_t cell_state_scale, const int16_t* input_gate,
-                           int16_t* forget_gate, const int16_t* cell_gate,
-                           bool use_cifg, int16_t clip);
-
-void CalculateLstmOutputInteger8x8_16(
-    int n_batch, int n_cell, int n_output, int16_t* cell_state,
-    int32_t cell_state_scale, const int16_t* output_gate,
-    int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp,
-    const int8_t* projection_weights, int32_t proj_scale_a,
-    int32_t proj_scale_b, const int32_t* projection_bias,
-    int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state,
-    int16_t* scratch0, int8_t* scratch1, int32_t* scratch2);
-
-void LstmStepFloat(
-    const float* input_ptr, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr,
-    const float* input_layer_norm_coefficients_ptr,
-    const float* forget_layer_norm_coefficients_ptr,
-    const float* cell_layer_norm_coefficients_ptr,
-    const float* output_layer_norm_coefficients_ptr,
-    const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr,
-    const float* cell_gate_bias_ptr, const float* output_gate_bias_ptr,
-    const float* projection_weights_ptr, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, int output_batch_leading_dim,
-    float* output_state_ptr, float* cell_state_ptr, float* scratch0,
-    float* scratch1, float* scratch2, float* scratch3, float* output_ptr);
-
-void LstmStepInteger8x8_16(
-    const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr,
-    int32_t effective_input_to_input_scale_a,
-    int32_t effective_input_to_input_scale_b,
-    const int8_t* input_to_forget_weight_ptr,
-    int32_t effective_input_to_forget_scale_a,
-    int32_t effective_input_to_forget_scale_b,
-    const int8_t* input_to_cell_weight_ptr,
-    int32_t effective_input_to_cell_scale_a,
-    int32_t effective_input_to_cell_scale_b,
-    const int8_t* input_to_output_weight_ptr,
-    int32_t effective_input_to_output_scale_a,
-    int32_t effective_input_to_output_scale_b,
-    const int8_t* recurrent_to_input_weight_ptr,
-    int32_t effective_recurrent_to_input_scale_a,
-    int32_t effective_recurrent_to_input_scale_b,
-    const int8_t* recurrent_to_forget_weight_ptr,
-    int32_t effective_recurrent_to_forget_scale_a,
-    int32_t effective_recurrent_to_forget_scale_b,
-    const int8_t* recurrent_to_cell_weight_ptr,
-    int32_t effective_recurrent_to_cell_scale_a,
-    int32_t effective_recurrent_to_cell_scale_b,
-    const int8_t* recurrent_to_output_weight_ptr,
-    int32_t effective_recurrent_to_output_scale_a,
-    int32_t effective_recurrent_to_output_scale_b,
-    const int16_t* cell_to_input_weight_ptr,
-    int32_t effective_cell_to_input_scale_a,
-    int32_t effective_cell_to_input_scale_b,
-    const int16_t* cell_to_forget_weight_ptr,
-    int32_t effective_cell_to_forget_scale_a,
-    int32_t effective_cell_to_forget_scale_b,
-    const int16_t* cell_to_output_weight_ptr,
-    int32_t effective_cell_to_output_scale_a,
-    int32_t effective_cell_to_output_scale_b,
-    const int8_t* projection_weight_ptr, int32_t effective_proj_scale_a,
-    int32_t effective_proj_scale_b, int32_t hidden_zp,
-    int32_t effective_hidden_scale_a, int32_t effective_hidden_scale_b,
-    const int16_t* layer_norm_input_weight_ptr,
-    int32_t layer_norm_input_scale_a, int32_t layer_norm_input_scale_b,
-    const int16_t* layer_norm_forget_weight_ptr,
-    int32_t layer_norm_forget_scale_a, int32_t layer_norm_forget_scale_b,
-    const int16_t* layer_norm_cell_weight_ptr, int32_t layer_norm_cell_scale_a,
-    int32_t layer_norm_cell_scale_b,
-    const int16_t* layer_norm_output_weight_ptr,
-    int32_t layer_norm_output_scale_a, int32_t layer_norm_output_scale_b,
-    const int32_t* input_gate_bias_ptr, const int32_t* forget_gate_bias_ptr,
-    const int32_t* cell_gate_bias_ptr, const int32_t* output_gate_bias_ptr,
-    int16_t quantized_cell_clip, int8_t quantized_proj_clip,
-    int32_t cell_state_scale, int32_t input_variance_guard,
-    int32_t forget_variance_guard, int32_t cell_variance_guard,
-    int32_t output_variance_guard,
-    const int32_t* input_to_forget_effective_bias,
-    const int32_t* recurrent_to_forget_effective_bias,
-    const int32_t* input_to_cell_effective_bias,
-    const int32_t* recurrent_to_cell_effective_bias,
-    const int32_t* input_to_output_effective_bias,
-    const int32_t* recurrent_to_output_effective_bias,
-    const int32_t* input_to_input_effective_bias,
-    const int32_t* recurrent_to_input_effective_bias,
-    const int32_t* projection_effective_bias, int n_batch, int n_cell,
-    int n_input, int n_output, int8_t* output_state_ptr,
-    int32_t output_state_zp, int16_t* cell_state_ptr, int8_t* output_ptr,
-    int16_t* scratch0, int16_t* scratch1, int16_t* scratch2, int16_t* scratch3,
-    int8_t* scratch4, int32_t* scratch5);
-}  // namespace lstm_internal
+    CellType* fc_output_buffer, const TfLiteFusedActivation activation) {
+  const auto gate_output_shape = step_info.StateShape();
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(step_info.InputOffset() + step_info.InputShape().FlatSize(),
+                   tflite::micro::GetTensorShape(input).FlatSize());
+  TFLITE_DCHECK_LE(
+      step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(recurrent).FlatSize());
 
-// Pamameters for integer LSTM.
-// Consider split this into two Integer Parameters if more fields are added.
-struct IntegerLstmParameter {
-  int32_t effective_input_to_input_scale_a = 0;
-  int32_t effective_input_to_input_scale_b = 0;
-  int32_t effective_recurrent_to_input_scale_a = 0;
-  int32_t effective_recurrent_to_input_scale_b = 0;
-  int32_t effective_cell_to_input_scale_a = 0;
-  int32_t effective_cell_to_input_scale_b = 0;
-  int32_t effective_input_to_forget_scale_a = 0;
-  int32_t effective_input_to_forget_scale_b = 0;
-  int32_t effective_recurrent_to_forget_scale_a = 0;
-  int32_t effective_recurrent_to_forget_scale_b = 0;
-  int32_t effective_cell_to_forget_scale_a = 0;
-  int32_t effective_cell_to_forget_scale_b = 0;
-  int32_t effective_input_to_cell_scale_a = 0;
-  int32_t effective_input_to_cell_scale_b = 0;
-  int32_t effective_recurrent_to_cell_scale_a = 0;
-  int32_t effective_recurrent_to_cell_scale_b = 0;
-  int32_t effective_input_to_output_scale_a = 0;
-  int32_t effective_input_to_output_scale_b = 0;
-  int32_t effective_recurrent_to_output_scale_a = 0;
-  int32_t effective_recurrent_to_output_scale_b = 0;
-  int32_t effective_cell_to_output_scale_a = 0;
-  int32_t effective_cell_to_output_scale_b = 0;
-  int32_t effective_proj_scale_a = 0;
-  int32_t effective_proj_scale_b = 0;
-  int32_t effective_hidden_scale_a = 0;
-  int32_t effective_hidden_scale_b = 0;
-  int32_t layer_norm_input_scale_a = 0;
-  int32_t layer_norm_input_scale_b = 0;
-  int32_t layer_norm_forget_scale_a = 0;
-  int32_t layer_norm_forget_scale_b = 0;
-  int32_t layer_norm_cell_scale_a = 0;
-  int32_t layer_norm_cell_scale_b = 0;
-  int32_t layer_norm_output_scale_a = 0;
-  int32_t layer_norm_output_scale_b = 0;
-  // Quantized clip value for cell and projection. Zero value means no
-  // clipping.
-  int16_t quantized_cell_clip = 0;
-  int8_t quantized_proj_clip = 0;
-  int32_t hidden_zp = 0;
-  int32_t cell_scale = 0;
-
-  int32_t input_variance_guard = 0;
-  int32_t forget_variance_guard = 0;
-  int32_t cell_variance_guard = 0;
-  int32_t output_variance_guard = 0;
-
-  // Pre-calculate bias + zero_point * weight.
-  int32_t* input_to_forget_effective_bias = nullptr;
-  int32_t* recurrent_to_forget_effective_bias = nullptr;
-  int32_t* input_to_cell_effective_bias = nullptr;
-  int32_t* recurrent_to_cell_effective_bias = nullptr;
-  int32_t* input_to_output_effective_bias = nullptr;
-  int32_t* recurrent_to_output_effective_bias = nullptr;
-  int32_t* input_to_input_effective_bias = nullptr;
-  int32_t* recurrent_to_input_effective_bias = nullptr;
-  int32_t* projection_effective_bias = nullptr;
-
-  // Scale and zero point for intermediate tensors.
-  // Used only in the 8x8_8 case.
-  int32_t intermediate_scale_a[8] = {};
-  int32_t intermediate_scale_b[8] = {};
-  int32_t intermediate_zp[12] = {};
-};
+  // Input FC
+  FullyConnected(gate_params.input_fc_params, step_info.InputShape(),
+                 tflite::micro::GetTensorData<ActivationType>(input) +
+                     step_info.InputOffset(),
+                 micro::GetTensorShape(input_weight),
+                 tflite::micro::GetTensorData<WeightType>(input_weight),
+                 tflite::micro::GetTensorShape(input_bias),
+                 tflite::micro::GetOptionalTensorData<BiasType>(input_bias),
+                 gate_output_shape, gate_output);
+
+  // Recurrent FC
+  FullyConnected(gate_params.recurrent_fc_params, step_info.StateShape(),
+                 tflite::micro::GetTensorData<ActivationType>(recurrent) +
+                     step_info.HiddenStateOffset(),
+                 tflite::micro::GetTensorShape(recurrent_weight),
+                 tflite::micro::GetTensorData<WeightType>(recurrent_weight),
+                 tflite::micro::GetTensorShape(recurrent_bias),
+                 tflite::micro::GetOptionalTensorData<BiasType>(recurrent_bias),
+                 gate_output_shape, fc_output_buffer);
+
+  AddElementWise(gate_output, fc_output_buffer,
+                 /*n_batch=*/gate_output_shape.DimsData()[0],
+                 /*n_state=*/gate_output_shape.DimsData()[1], gate_output);
+  // Apply activation
+  switch (activation) {
+    case kTfLiteActSigmoid:
+      Sigmoid(gate_output_shape, gate_output);
+      break;
+    case kTfLiteActTanh: {
+      // Set the scale power to -12 to avoid shift
+      Tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output,
+           gate_output_shape, gate_output);
+    } break;
+    default:
+      // Only Sigmoid or Tanh is used.
+      TFLITE_ASSERT_FALSE;
+  }
+}
 
-TfLiteStatus EvalFloatLstm(
-    const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* input_to_input_weights,
-    const TfLiteEvalTensor* input_to_forget_weights,
-    const TfLiteEvalTensor* input_to_cell_weights,
-    const TfLiteEvalTensor* input_to_output_weights,
-    const TfLiteEvalTensor* recurrent_to_input_weights,
-    const TfLiteEvalTensor* recurrent_to_forget_weights,
-    const TfLiteEvalTensor* recurrent_to_cell_weights,
-    const TfLiteEvalTensor* recurrent_to_output_weights,
-    const TfLiteEvalTensor* cell_to_input_weights,
-    const TfLiteEvalTensor* cell_to_forget_weights,
-    const TfLiteEvalTensor* cell_to_output_weights,
-    const TfLiteEvalTensor* input_layer_norm_coefficients,
-    const TfLiteEvalTensor* forget_layer_norm_coefficients,
-    const TfLiteEvalTensor* cell_layer_norm_coefficients,
-    const TfLiteEvalTensor* output_layer_norm_coefficients,
-    const TfLiteEvalTensor* aux_input,
-    const TfLiteEvalTensor* aux_input_to_input_weights,
-    const TfLiteEvalTensor* aux_input_to_forget_weights,
-    const TfLiteEvalTensor* aux_input_to_cell_weights,
-    const TfLiteEvalTensor* aux_input_to_output_weights,
-    const TfLiteEvalTensor* input_gate_bias,
-    const TfLiteEvalTensor* forget_gate_bias,
-    const TfLiteEvalTensor* cell_gate_bias,
-    const TfLiteEvalTensor* output_gate_bias,
-    const TfLiteEvalTensor* projection_weights,
-    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
-    bool forward_sequence, bool time_major, int output_offset,
-    float* scratch_buffer, TfLiteEvalTensor* output_state,
-    TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output);
-
-TfLiteStatus EvalInteger8x8_16Lstm(
-    const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* input_to_input_weights,
-    const TfLiteEvalTensor* input_to_forget_weights,
-    const TfLiteEvalTensor* input_to_cell_weights,
-    const TfLiteEvalTensor* input_to_output_weights,
-    const TfLiteEvalTensor* recurrent_to_input_weights,
-    const TfLiteEvalTensor* recurrent_to_forget_weights,
-    const TfLiteEvalTensor* recurrent_to_cell_weights,
-    const TfLiteEvalTensor* recurrent_to_output_weights,
-    const TfLiteEvalTensor* cell_to_input_weights,
-    const TfLiteEvalTensor* cell_to_forget_weights,
-    const TfLiteEvalTensor* cell_to_output_weights,
-    const TfLiteEvalTensor* input_layer_norm_coefficients,
-    const TfLiteEvalTensor* forget_layer_norm_coefficients,
-    const TfLiteEvalTensor* cell_layer_norm_coefficients,
-    const TfLiteEvalTensor* output_layer_norm_coefficients,
-    const TfLiteEvalTensor* input_gate_bias,
-    const TfLiteEvalTensor* forget_gate_bias,
-    const TfLiteEvalTensor* cell_gate_bias,
-    const TfLiteEvalTensor* output_gate_bias,
-    const TfLiteEvalTensor* projection_weights,
-    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
-    bool forward_sequence, bool time_major,
-    const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp,
-    TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
-    TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1,
-    int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5);
+// Update the cell state using the output from the forget gate, input gate, and
+// cell gate Formula: updated_cell_state = forget_gate_output*cell_state +
+// input_gate_output * cell_gate_output, where * denotes element wise
+// multiplication
+template <typename CellType>
+void UpdateLstmCell(const LstmStepManager& step_info,
+                    TfLiteEvalTensor* cell_state,
+                    // Gate outputs
+                    CellType* forget_gate_output,
+                    const CellType* input_gate_output,
+                    const CellType* cell_gate_output,
+                    // Mul parameters
+                    const ArithmeticParams& forget_cell_mul_params,
+                    const ArithmeticParams& input_mul_params,
+                    const CellStateInfo& cell_state_info, CellType* buffer) {
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.CellStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(cell_state).FlatSize());
 
+  auto cell_state_shape = step_info.StateShape();
+  // Forget Gate x Cell State
+  Mul(cell_state_shape, forget_cell_mul_params, forget_gate_output,
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+          step_info.CellStateOffset(),
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+          step_info.CellStateOffset());
+  // Input Gate x Cell Gate
+  Mul(cell_state_shape, input_mul_params, input_gate_output, cell_gate_output,
+      buffer);
+
+  // Update the cell state
+  AddElementWise(tflite::micro::GetTensorData<CellType>(cell_state) +
+                     step_info.CellStateOffset(),
+                 buffer,
+                 /*n_batch=*/cell_state_shape.DimsData()[0],
+                 /*n_state=*/cell_state_shape.DimsData()[1],
+                 tflite::micro::GetTensorData<CellType>(cell_state) +
+                     step_info.CellStateOffset());
+
+  if (cell_state_info.cell_clip > 0) {
+    Clipping(cell_state_shape.FlatSize(), cell_state_info,
+             tflite::micro::GetTensorData<CellType>(cell_state) +
+                 step_info.CellStateOffset());
+  }
+}
+
+// Update the hidden state of the LSTM kernel using the following formula:
+// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
+// element wise multiplication
+template <typename CellType, typename ActivationType>
+void UpdateLstmHidden(const LstmStepManager& step_info,
+                      TfLiteEvalTensor* cell_state,
+                      TfLiteEvalTensor* hidden_state,
+                      const CellType* output_gate_output,
+                      const ArithmeticParams& mul_params,
+                      int32_t cell_state_scale_power, CellType* buffer) {
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.CellStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(cell_state).FlatSize());
+  TFLITE_DCHECK_LE(
+      step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(hidden_state).FlatSize());
+
+  auto cell_state_shape = step_info.StateShape();
+  CellType* cell_state_data =
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+      step_info.CellStateOffset();
+  // Tanh(cell_state)
+  Tanh(cell_state_scale_power, cell_state_shape, cell_state_data,
+       cell_state_shape, buffer);
+  // Update the hidden state
+  Mul(cell_state_shape, mul_params, buffer, output_gate_output,
+      tflite::micro::GetTensorData<ActivationType>(hidden_state) +
+          step_info.HiddenStateOffset());
+}
+
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
+              LSTMKernelContents& kernel_content,
+              const LSTMBuffers<CellType>& buffers) {
+  /*Step1: Calculate gate outputs to prepare cell state update*/
+  CellType* gate_internal_buffer = buffers.buffer3;
+  CellType* forget_gate_output = buffers.buffer0;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.forget_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToForgetWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToForgetWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      forget_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  // Input Gate calculation;
+  CellType* input_gate_output = buffers.buffer1;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.input_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToInputWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToInputWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      input_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  // Cell Gate calculation
+  CellType* cell_gate_output = buffers.buffer2;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.cell_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToCellWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToCellWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      cell_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, op_data.cell_gate_nonlinear_type);
+
+  /*Step2: update the cell state */
+  const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters;
+  CellType* updated_input_buffer = buffers.buffer1;  // reuse buffer
+
+  UpdateLstmCell<CellType>(step_info, kernel_content.CellStateTensor(),
+                           forget_gate_output, input_gate_output,
+                           cell_gate_output,
+                           inter_gate_params.forget_cell_mul_params,
+                           inter_gate_params.input_mul_params,
+                           op_data.cell_state_info, updated_input_buffer);
+
+  /*Step3: update the hidden state */
+  CellType* output_gate_output = buffers.buffer1;  // reuse buffer
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.output_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToOutputWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToOutputWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      output_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  CellType* tanh_activated_cell_buffer = buffers.buffer0;  // reuse buffer
+  tflite::lstm_internal::UpdateLstmHidden<CellType, ActivationType>(
+      step_info, kernel_content.CellStateTensor(),
+      kernel_content.HiddenStateTensor(), output_gate_output,
+      inter_gate_params.output_mul_params,
+      op_data.cell_state_info.cell_state_scale_power,
+      tanh_activated_cell_buffer);
+
+  /*Step4: copy the update the hidden state to output*/
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.OutputOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(kernel_content.output_tensor).FlatSize());
+  // record the output (from the updated hidden state)
+  ActivationType* output_ptr = tflite::micro::GetTensorData<ActivationType>(
+      kernel_content.output_tensor);
+  const auto* hidden_state = kernel_content.HiddenStateTensor();
+  std::memcpy(output_ptr + step_info.OutputOffset(),
+              tflite::micro::GetTensorData<ActivationType>(hidden_state) +
+                  step_info.HiddenStateOffset(),
+              step_info.StateShape().FlatSize() * sizeof(ActivationType));
+}
+
+}  // namespace lstm_internal
+
+// Evaulate the LSTM kernel with (potential) multi-steps and multi-batch input
+// Since
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+TfLiteStatus EvalLstm(const OpDataLSTM& op_data,
+                      LSTMKernelContents& kernel_content,
+                      const LSTMBuffers<CellType>& buffers) {
+  lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  const auto& size_info = op_data.size_info;
+  // time is the first dimention, enable batch computation
+  if (size_info.time_major) {
+    for (int t = 0; t < size_info.time_steps; t++) {
+      lstm_internal::LstmStep<ActivationType, WeightType, CellType, BiasType>(
+          step_info, op_data, kernel_content, buffers);
+      // prepare for the next time step
+      step_info.UpdateTime();
+    }
+  } else {
+    // batch first, unable to size the input data. single batch inference
+    for (int b = 0; b < size_info.batch_size; b++) {
+      for (int t = 0; t < size_info.time_steps; t++) {
+        lstm_internal::LstmStep<ActivationType, WeightType, CellType, BiasType>(
+            step_info, op_data, kernel_content, buffers);
+        // prepare for the next time step
+        step_info.UpdateTime();
+      }
+      // prepare for the next batch
+      step_info.UpdateBatch();
+      step_info.ResetTime();
+    }
+  }
+  return kTfLiteOk;
+}
 }  // namespace tflite
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_16ACT_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc
new file mode 100644
index 000000000..22a6d4600
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_common.cc
@@ -0,0 +1,326 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
+#include "tensorflow/lite/micro/kernels/lstm_eval.h"
+
+namespace tflite {
+
+// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I),
+// State dimension (S)) that defines the LSTM using the input and hidden state
+// tensor
+LstmSizeInfo CreateLstmSizeInfo(
+    const bool time_major, const TfLiteIntArray* input_tensor_shape,
+    const TfLiteIntArray* hidden_state_tensor_shape) {
+  LstmSizeInfo size_info;
+  size_info.time_major = time_major;
+  size_info.batch_size =
+      time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0];
+  size_info.time_steps =
+      time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1];
+  size_info.input_dimension = input_tensor_shape->data[2];
+  size_info.state_dimension = hidden_state_tensor_shape->data[1];
+  return size_info;
+}
+
+TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context,
+                                      const TfLiteTensor* tensor, int dim1_size,
+                                      int dim2_size) {
+  TF_LITE_ENSURE_EQ(context, tensor->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], dim1_size);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[1], dim2_size);
+  return kTfLiteOk;
+}
+
+TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context,
+                                    const TfLiteTensor* tensor, int size) {
+  TF_LITE_ENSURE_EQ(context, tensor->dims->size, 1);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], size);
+  return kTfLiteOk;
+}
+
+// Go through every tensors and make sure their shape match the kernel
+// configuration
+TfLiteStatus ValidateTensorSize(TfLiteContext* context,
+                                const LstmTensors& tensors,
+                                const LstmSizeInfo& size_info) {
+  // Input FC weights
+  for (size_t i = 1; i < 5; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i),
+                                          size_info.state_dimension,
+                                          size_info.input_dimension));
+  }
+  // Recurrent FC weights
+  for (size_t i = 5; i < 9; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i),
+                                          size_info.state_dimension,
+                                          size_info.state_dimension));
+  }
+  // Biases
+  for (size_t i = 12; i < 16; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateBiasTensorSize(context, tensors.GetInternalTensor(i),
+                                        size_info.state_dimension));
+  }
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  TF_LITE_ENSURE_EQ(context, NumElements(tensors.HiddenStateTensor()),
+                    size_info.batch_size * size_info.state_dimension);
+  TF_LITE_ENSURE_EQ(context, NumElements(tensors.CellStateTensor()),
+                    size_info.batch_size * size_info.state_dimension);
+
+  // Check the shape of output tensor against that of input tensor
+  TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->size, 3);
+  TF_LITE_ENSURE_EQ(context,
+                    tensors.GetInternalTensor(kLstmInputTensor)->dims->data[0],
+                    tensors.OutputTensor()->dims->data[0]);
+  TF_LITE_ENSURE_EQ(context,
+                    tensors.GetInternalTensor(kLstmInputTensor)->dims->data[1],
+                    tensors.OutputTensor()->dims->data[1]);
+  TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->data[2],
+                    size_info.state_dimension);
+  return kTfLiteOk;
+}
+
+// Wrapper function to create gate parameters for the four internal LSTM gates
+TfLiteStatus CreateGateParams(
+    TfLiteContext* context,
+    /*Input tensors*/
+    const TfLiteTensor* input, const TfLiteTensor* input_weight,
+    const TfLiteTensor* input_bias,
+    /*Hidden state tensors*/
+    const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight,
+    const TfLiteTensor* hidden_state_bias,
+    /*Scale of the fc output (input to non-linear activation)*/
+    const float nonlinear_activation_input_scale, const TfLiteType cell_type,
+    tflite::GateParameters& gate_params) {
+  // A temp tflite tensor to represent the output of fc operation. Only the data
+  // type and quantization parameters are set since it is only used for
+  // parameter calculations
+  TfLiteTensor fc_output_temp;
+  fc_output_temp.type = cell_type;
+  fc_output_temp.params.scale = nonlinear_activation_input_scale;
+  fc_output_temp.params.zero_point = 0;  // symmetrical quantized
+
+  // A temp fc opdata to reuse the helper function on creating fc parameters
+  tflite::OpDataFullyConnected fc_data_temp;
+  // TODO(b/265853320): due to the lack of precision for the float scale,
+  // scale_diff / output_scale <= 0.02 (potentially requires 1e-8 precision) can
+  // not be satisified for the bias. Here we rely on the correctiveness of the
+  // conversion process (set input_bias=nullptr to avoid checking) for
+  // tensor scales
+  TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected(
+      context, kTfLiteActNone, input->type, input, input_weight,
+      /*input_bias=*/nullptr, &fc_output_temp, &fc_data_temp));
+  gate_params.input_fc_params = FullyConnectedParamsQuantized(fc_data_temp);
+  double real_multiplier = 0.0;
+  GetQuantizedConvolutionMultipler(context, input, input_weight, nullptr,
+                                   &fc_output_temp, &real_multiplier);
+
+  TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected(
+      context, kTfLiteActNone, hidden_state->type, hidden_state,
+      hidden_state_weight, hidden_state_bias, &fc_output_temp, &fc_data_temp));
+  gate_params.recurrent_fc_params = FullyConnectedParamsQuantized(fc_data_temp);
+  return kTfLiteOk;
+}
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale,
+                                                  const float input2_scale,
+                                                  const float output_scale,
+                                                  const TfLiteType output_type,
+                                                  const int output_zp) {
+  tflite::ArithmeticParams op_params = {};
+  if (output_type == kTfLiteInt16) {
+    op_params.quantized_activation_min = std::numeric_limits<int16_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
+  } else if (output_type == kTfLiteInt8) {
+    op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+  }
+
+  op_params.input1_offset = 0;  // symmetric
+  op_params.input2_offset = 0;  // symmetric
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+      static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale =
+      input_product_scale / static_cast<double>(output_scale);
+
+  QuantizeMultiplier(effective_scale, &op_params.output_multiplier,
+                     &op_params.output_shift);
+  return op_params;
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale,
+                                      const float cell_clip) {
+  CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  tflite::CheckedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(
+      std::min(std::max(static_cast<double>(cell_clip) /
+                            static_cast<double>(cell_state_scale),
+                        -32768.0),
+               32767.0));
+
+  return cell_state_info;
+}
+
+CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip) {
+  CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.cell_state_scale_power = 0;  // no quantization
+  cell_state_info.quantized_cell_clip = 0;     // no quantization
+  return cell_state_info;
+}
+
+tflite::FullyConnectedParams CreateFCParamsFloat() {
+  FullyConnectedParams op_params;
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+tflite::GateParameters CreateGateParamsFloat() {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParamsFloat();
+  gate_params.recurrent_fc_params = CreateFCParamsFloat();
+  return gate_params;
+}
+
+tflite::ArithmeticParams CreateInterGateMulParamsFloat() {
+  tflite::ArithmeticParams op_params = {};
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context,
+                                        const LstmTensors& lstm_tensors,
+                                        OpDataLSTM* op_data_lstm) {
+  // Gate Parameters
+  op_data_lstm->forget_gate_parameters = CreateGateParamsFloat();
+  op_data_lstm->input_gate_parameters = CreateGateParamsFloat();
+  op_data_lstm->cell_gate_parameters = CreateGateParamsFloat();
+  op_data_lstm->output_gate_parameters = CreateGateParamsFloat();
+  // Inter gate multiplication parameters
+  op_data_lstm->inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data_lstm->inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data_lstm->inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParamsFloat();
+  return kTfLiteOk;
+}
+
+TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context,
+                                          const LstmTensors& lstm_tensors,
+                                          OpDataLSTM* op_data_lstm) {
+  float nonlinear_input_scale = 0.00024414062;  // 2^-12 Q3.12 -> Q0.15
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToForgetWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmForgetGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToForgetWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data_lstm->forget_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToInputWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToInputWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data_lstm->input_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToCellWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmCellGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToCellWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data_lstm->cell_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToOutputWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToOutputWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data_lstm->output_gate_parameters));
+
+  // Inter gate multiplication parameters
+  float nonlinear_output_scale = 0.00003051757;  // 2^-15 Q3.12 -> Q0.15
+  float cell_state_scale = lstm_tensors.CellStateTensor()->params.scale;
+  // forget gate output (nonlinear output) x cell state -> cell state
+  op_data_lstm->inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParams(nonlinear_output_scale, cell_state_scale,
+                               cell_state_scale, kTfLiteInt16);
+  // input gate output x cell gate output -> cell state
+  op_data_lstm->inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParams(nonlinear_output_scale, nonlinear_output_scale,
+                               cell_state_scale, kTfLiteInt16);
+  // tanh output x output gate output -> hidden state (potentially asymmetric)
+  op_data_lstm->inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParams(
+          nonlinear_output_scale, nonlinear_output_scale,
+          lstm_tensors.HiddenStateTensor()->params.scale,
+          lstm_tensors.HiddenStateTensor()->type,
+          lstm_tensors.HiddenStateTensor()->params.zero_point);
+  return kTfLiteOk;
+}
+
+LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context,
+                                           TfLiteNode* node) {
+  LSTMKernelContents kernel_content;
+  // Point to correct tensors
+  for (size_t i = 0; i < 24; i++) {
+    kernel_content.internal_tensors[i] =
+        tflite::micro::GetMutableEvalInput(context, node, i);
+  }
+  // Output tensor
+  kernel_content.output_tensor = tflite::micro::GetEvalOutput(context, node, 0);
+  return kernel_content;
+}
+
+}  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h
new file mode 100644
index 000000000..aee12cf39
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h
@@ -0,0 +1,817 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
+
+#include <algorithm>
+#include <limits>
+
+#include "tensorflow/lite/micro/kernels/lstm_eval.h"
+#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h"
+#include "tensorflow/lite/micro/test_helpers.h"
+#include "tensorflow/lite/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+
+/*Helper Functions (mainly about mimicking the kernel preparation)*/
+
+// Create fully connected parameters using quantization settings of input and
+// weight tensors.
+// Since TfLiteContext is not available during the kernel test, here we mimic
+// (put into stack memory) CalculateOpDataFullyConnected in
+// tensorflow/lite/micro/kernels/fully_connected_common.cc
+template <typename CellType>
+tflite::FullyConnectedParams CreateFCParams(
+    const TensorQuantizationParameters& input_quant_params,
+    const TensorQuantizationParameters& weight_quant_params,
+    const float nonlinear_activation_input_scale) {
+  OpDataFullyConnected data;
+  const double input_product_scale =
+      input_quant_params.scale * weight_quant_params.scale;
+  double effective_scale =
+      input_product_scale /
+      static_cast<double>(nonlinear_activation_input_scale);
+
+  QuantizeMultiplier(effective_scale, &data.output_multiplier,
+                     &data.output_shift);
+
+  data.input_zero_point = input_quant_params.zero_point;
+
+  data.filter_zero_point = 0;  // symmetrically quantized
+  data.output_zero_point = 0;  // symmetrically quantized
+
+  data.output_activation_min = std::numeric_limits<CellType>::min();
+  data.output_activation_max = std::numeric_limits<CellType>::max();
+
+  return tflite::FullyConnectedParamsQuantized(data);
+}
+
+inline tflite::FullyConnectedParams CreateFCParamsFloat() {
+  FullyConnectedParams op_params;
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+// Wrapper function to create gate parameters for the four internal LSTM gates
+template <typename CellType>
+tflite::GateParameters CreateGateParams(
+    const TensorQuantizationParameters& input_quant_params,
+    const TensorQuantizationParameters& hidden_state_quant_params,
+    const GateQuantizationParameters& gate_quantization_settings,
+    const float nonlinear_activation_input_scale) {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParams<CellType>(
+      input_quant_params, gate_quantization_settings.activation_weight,
+      nonlinear_activation_input_scale);
+  gate_params.recurrent_fc_params = CreateFCParams<CellType>(
+      hidden_state_quant_params, gate_quantization_settings.recurrent_weight,
+      nonlinear_activation_input_scale);
+  return gate_params;
+}
+
+inline tflite::GateParameters CreateGateParamsFloat() {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParamsFloat();
+  gate_params.recurrent_fc_params = CreateFCParamsFloat();
+  return gate_params;
+}
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+template <typename OutputType>
+tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale,
+                                                  const float input2_scale,
+                                                  const float output_scale,
+                                                  const int output_zp = 0) {
+  tflite::ArithmeticParams op_params = {};
+  op_params.quantized_activation_min = std::numeric_limits<OutputType>::min();
+  op_params.quantized_activation_max = std::numeric_limits<OutputType>::max();
+  op_params.input1_offset = 0;
+  op_params.input2_offset = 0;
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+      static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale =
+      input_product_scale / static_cast<double>(output_scale);
+
+  QuantizeMultiplier(effective_scale, &op_params.output_multiplier,
+                     &op_params.output_shift);
+  return op_params;
+}
+
+inline tflite::ArithmeticParams CreateInterGateMulParamsFloat() {
+  tflite::ArithmeticParams op_params = {};
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale,
+                                      const float cell_clip) {
+  CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  tflite::CheckedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(
+      std::min(std::max(static_cast<double>(cell_clip) /
+                            static_cast<double>(cell_state_scale),
+                        -32768.0),
+               32767.0));
+  return cell_state_info;
+}
+
+// Create LSTMKernelContents from LstmNodeContent by copying TfLiteEvalTensor
+// pointers
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+LSTMKernelContents CreateLSTMKernelContent(
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  LSTMKernelContents kernel_content;
+  // Point to correct tensors
+  kernel_content.internal_tensors[kLstmInputTensor] =
+      node_contents.GetEvalTensor(kLstmInputTensor);
+  kernel_content.internal_tensors[kLstmInputToInputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToInputWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToForgetWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToCellWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToCellWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToOutputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToInputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToForgetWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToCellWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToOutputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmInputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmForgetGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmForgetGateBiasTensor);
+  kernel_content.internal_tensors[kLstmCellGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmCellGateBiasTensor);
+  kernel_content.internal_tensors[kLstmOutputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmOutputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmOutputStateTensor] =
+      node_contents.GetEvalTensor(kLstmOutputStateTensor);
+  kernel_content.internal_tensors[kLstmOutputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmOutputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmCellStateTensor] =
+      node_contents.GetEvalTensor(kLstmCellStateTensor);
+  // Not used internal tensors
+  kernel_content.internal_tensors[kLstmCellToInputWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmCellToForgetWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmCellToOutputWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmProjectionWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmProjectionBiasTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmForgetLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmCellLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmOutputLayerNormCoefficientsTensor] =
+      nullptr;
+  // Output tensor
+  kernel_content.output_tensor = node_contents.OutputEvalTensor();
+  return kernel_content;
+}
+
+// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I),
+// State dimension (S)) that defines the LSTM using the input and hidden state
+// tensor
+LstmSizeInfo CreateLstmSizeInfo(
+    const bool time_major, const TfLiteIntArray* input_tensor_shape,
+    const TfLiteIntArray* hidden_state_tensor_shape) {
+  LstmSizeInfo size_info;
+  size_info.time_major = time_major;
+  size_info.batch_size =
+      time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0];
+  size_info.time_steps =
+      time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1];
+  size_info.input_dimension = input_tensor_shape->data[2];
+  size_info.state_dimension = hidden_state_tensor_shape->data[1];
+  return size_info;
+}
+
+// Create the LstmOpData using the LstmNodeContent and
+// NodeQuantizationParameters (defined in test_data/lstm_test_data) During the
+// actual inference phase, OpDataLSTM is created using information from the
+// flatbuffer file. The test divide the complete LSTM node information into
+// LstmNodeContent and NodeQuantizationParameters for easy construction
+// purposes
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+OpDataLSTM CreateLstmOpData(
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  const auto& builtin_data = node_contents.BuiltinData();
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  OpDataLSTM op_data;
+
+  op_data.cell_gate_nonlinear_type = builtin_data.activation;
+  op_data.size_info =
+      CreateLstmSizeInfo(builtin_data.time_major,
+                         node_contents.GetEvalTensor(kLstmInputTensor)->dims,
+                         node_contents.HiddenStateEvalTensor()->dims);
+
+  op_data.cell_state_info = CreateLstmCellStateInfo(
+      quantization_settings.cell_state.scale, builtin_data.cell_clip);
+
+  // Gate Parameters
+  op_data.forget_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.forget_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.input_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.input_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.cell_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.cell_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.output_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.output_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  // Inter gate multiplication parameters
+  op_data.inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParams<CellType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.cell_state.scale,
+          quantization_settings.cell_state.scale);
+  op_data.inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParams<CellType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.cell_state.scale);
+  op_data.inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParams<ActivationType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.hidden_state.scale,
+          quantization_settings.hidden_state.zero_point);
+  return op_data;
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+OpDataLSTM CreateLstmOpDataFloat(
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  const auto& builtin_data = node_contents.BuiltinData();
+  OpDataLSTM op_data;
+
+  op_data.cell_gate_nonlinear_type = builtin_data.activation;
+  op_data.size_info =
+      CreateLstmSizeInfo(builtin_data.time_major,
+                         node_contents.GetEvalTensor(kLstmInputTensor)->dims,
+                         node_contents.HiddenStateEvalTensor()->dims);
+  op_data.cell_state_info.cell_clip = builtin_data.cell_clip;
+  op_data.cell_state_info.quantized_cell_clip = 0;     // No quantization
+  op_data.cell_state_info.cell_state_scale_power = 0;  // No quantization
+
+  // Gate Parameters
+  op_data.forget_gate_parameters = CreateGateParamsFloat();
+  op_data.input_gate_parameters = CreateGateParamsFloat();
+  op_data.cell_gate_parameters = CreateGateParamsFloat();
+  op_data.output_gate_parameters = CreateGateParamsFloat();
+  // Inter gate multiplication parameters
+  op_data.inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data.inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data.inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParamsFloat();
+  return op_data;
+}
+
+/*Test Functions Below Here*/
+template <typename T>
+void ValidateResultGoldens(const T* golden, const T* output_data,
+                           const int output_len, const float tolerance) {
+  for (int i = 0; i < output_len; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance);
+  }
+}
+
+template <int batch_size, int state_dimension>
+void TestCalculateLstmGateFloat(const TfLiteEvalTensor* input,
+                                const TfLiteEvalTensor* input_weight,
+                                const TfLiteEvalTensor* input_bias,
+                                // Recurrent FC
+                                const TfLiteEvalTensor* recurrent,
+                                const TfLiteEvalTensor* recurrent_weight,
+                                const TfLiteEvalTensor* recurrent_bias,
+                                // Result comparison
+                                TfLiteFusedActivation nonlinear_type,
+                                const float* expected_vals, float tolerance) {
+  float gate_output[batch_size * state_dimension] = {};
+  float fc_output_buffer[batch_size * state_dimension] = {};
+
+  tflite::GateParameters gate_params = CreateGateParamsFloat();
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false, input->dims, recurrent->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  tflite::lstm_internal::CalculateLstmGate<float, float, float, float>(
+      step_info, gate_params,
+      // Input FC
+      input, input_weight, input_bias,
+      // Recurrent FC
+      recurrent, recurrent_weight, recurrent_bias,
+      // Output
+      gate_output,
+      // Scratch arrays
+      fc_output_buffer, nonlinear_type);
+
+  ValidateResultGoldens(expected_vals, gate_output,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int state_dimension>
+void TestCalculateLstmGateInteger(
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight,
+    const TfLiteEvalTensor* input_bias,
+    // Recurrent FC
+    const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight,
+    const TfLiteEvalTensor* recurrent_bias,
+    // Quantization settings
+    const NodeQuantizationParameters& node_quantization_settings,
+    const GateQuantizationParameters& gate_quantization_settings,
+    // Result comparison
+    TfLiteFusedActivation nonlinear_type, const float* expected_vals,
+    float tolerance) {
+  CellType gate_output[batch_size * state_dimension] = {};
+  CellType fc_output_buffer[batch_size * state_dimension] = {};
+
+  tflite::GateParameters gate_params = CreateGateParams<CellType>(
+      node_quantization_settings.input, node_quantization_settings.hidden_state,
+      gate_quantization_settings,
+      node_quantization_settings.nonlinear_activation_input_scale);
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false, input->dims, recurrent->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // only int8 weight is supported now
+  tflite::lstm_internal::CalculateLstmGate<ActivationType, WeightType, CellType,
+                                           BiasType>(
+      step_info, gate_params,
+      // Input FC
+      input, input_weight, input_bias,
+      // Recurrent FC
+      recurrent, recurrent_weight, recurrent_bias,
+      // Output
+      gate_output,
+      // Scratch arrays
+      fc_output_buffer, nonlinear_type);
+
+  float gate_output_float[batch_size * state_dimension] = {};
+  Dequantize(gate_output, batch_size * state_dimension,
+             node_quantization_settings.nonlinear_activation_output_scale, 0,
+             gate_output_float);
+
+  ValidateResultGoldens(expected_vals, gate_output_float,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestUpdateLstmCellFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  float buffer[batch_size * state_dimension] = {};
+
+  auto forget_cell_mul_params = CreateInterGateMulParamsFloat();
+  auto input_mul_params = CreateInterGateMulParamsFloat();
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // copy the data since it will be updated
+  float forget_gate[batch_size * state_dimension] = {};
+  std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output,
+              batch_size * state_dimension * sizeof(float));
+
+  CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = node_content.BuiltinData().cell_clip;
+  // Call the function to be tested
+  tflite::lstm_internal::UpdateLstmCell<float>(
+      step_info, cell_state, forget_gate,
+      gate_output_data.expected_input_gate_output,
+      gate_output_data.expected_cell_gate_output, forget_cell_mul_params,
+      input_mul_params, cell_state_info, buffer);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        tflite::micro::GetTensorData<float>(cell_state),
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestUpdateLstmCellInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  const auto& quantization_settings = node_content.QuantizationSettings();
+  CellType quantized_forget_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_forget_gate_output,
+                   quantized_forget_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_input_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_input_gate_output,
+                   quantized_input_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_cell_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_cell_gate_output,
+                   quantized_cell_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType buffer[batch_size * state_dimension] = {};
+
+  auto forget_cell_mul_params = CreateInterGateMulParams<CellType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.cell_state.scale,
+      quantization_settings.cell_state.scale);
+  auto input_mul_params = CreateInterGateMulParams<CellType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.cell_state.scale);
+
+  auto cell_state_info =
+      CreateLstmCellStateInfo(quantization_settings.cell_state.scale,
+                              node_content.BuiltinData().cell_clip);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // Call the function to be tested
+  tflite::lstm_internal::UpdateLstmCell<CellType>(
+      step_info, cell_state, quantized_forget_gate, quantized_input_gate,
+      quantized_cell_gate, forget_cell_mul_params, input_mul_params,
+      cell_state_info, buffer);
+
+  float cell_state_float[batch_size * state_dimension] = {};
+  Dequantize(tflite::micro::GetTensorData<CellType>(cell_state),
+             batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point, cell_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        cell_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestUpdateLstmHiddenFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  float buffer[batch_size * state_dimension] = {};
+
+  auto mul_params = CreateInterGateMulParamsFloat();
+
+  int32_t cell_state_scale_power = 0;
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  auto hidden_state = node_content.HiddenStateEvalTensor();
+
+  tflite::lstm_internal::UpdateLstmHidden<float, float>(
+      step_info, cell_state, hidden_state,
+      gate_output_data.expected_output_gate_output, mul_params,
+      cell_state_scale_power, buffer);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        tflite::micro::GetTensorData<float>(hidden_state),
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestUpdateLstmHiddenInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  const auto& quantization_settings = node_content.QuantizationSettings();
+  CellType quantized_output_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_output_gate_output,
+                   quantized_output_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType buffer[batch_size * state_dimension] = {};
+
+  auto mul_params = CreateInterGateMulParams<ActivationType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.hidden_state.scale,
+      quantization_settings.hidden_state.zero_point);
+
+  int cell_state_scale_power_buffer;
+  tflite::CheckedLog2(quantization_settings.cell_state.scale,
+                      &cell_state_scale_power_buffer);
+  int32_t cell_state_scale_power = cell_state_scale_power_buffer;
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  auto hidden_state = node_content.HiddenStateEvalTensor();
+
+  tflite::lstm_internal::UpdateLstmHidden<CellType, ActivationType>(
+      step_info, cell_state, hidden_state, quantized_output_gate, mul_params,
+      cell_state_scale_power, buffer);
+
+  float hidden_state_float[batch_size * state_dimension] = {};
+  Dequantize(tflite::micro::GetTensorData<ActivationType>(hidden_state),
+             batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point, hidden_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        hidden_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestLstmStepFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    /*can not be const, state will be updated*/
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  LSTMBuffers<float> buffers;
+  // Scratch buffers on the stack
+  float buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  float buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  float buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  float buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents);
+  // set time_major to true to test batch inference
+  op_data.size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  tflite::lstm_internal::LstmStep<float, float, float, float>(
+      step_info, op_data, kernel_content, buffers);
+
+  ValidateResultGoldens(
+      gate_output_data.expected_updated_hidden,
+      tflite::micro::GetTensorData<float>(kernel_content.HiddenStateTensor()),
+      batch_size * state_dimension, hidden_state_tolerance);
+  ValidateResultGoldens(
+      gate_output_data.expected_updated_cell,
+      tflite::micro::GetTensorData<float>(kernel_content.CellStateTensor()),
+      batch_size * state_dimension, cell_state_tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestLstmStepInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    /*can not be const, state will be updated*/
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  LSTMBuffers<CellType> buffers;
+
+  // Scratch buffers on the stack
+  CellType buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  CellType buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  CellType buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  CellType buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpData(node_contents);
+  // set time_major to true to test batch inference
+  op_data.size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  tflite::lstm_internal::LstmStep<ActivationType, WeightType, CellType,
+                                  BiasType>(step_info, op_data, kernel_content,
+                                            buffers);
+
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(
+      tflite::micro::GetTensorData<ActivationType>(
+          kernel_content.HiddenStateTensor()),
+      batch_size * state_dimension, quantization_settings.hidden_state.scale,
+      quantization_settings.hidden_state.zero_point, dequantized_hidden_state);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(
+      tflite::micro::GetTensorData<CellType>(kernel_content.CellStateTensor()),
+      batch_size * state_dimension, quantization_settings.cell_state.scale,
+      quantization_settings.cell_state.zero_point, dequantized_cell_state);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestEvalLstmFloat(
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the node
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  // Scratch buffers on the stack
+  LSTMBuffers<float> buffers;
+  float buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  float buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  float buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  float buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents);
+
+  tflite::EvalLstm<float, float, float, float>(op_data, kernel_content,
+                                               buffers);
+
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        node_contents.GetHiddenStateData(),
+                        batch_size * state_dimension, hidden_state_tolerance);
+
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        node_contents.GetCellStateData(),
+                        batch_size * state_dimension, cell_state_tolerance);
+
+  ValidateResultGoldens(eval_check_data.expected_output,
+                        node_contents.GetOutputData(),
+                        batch_size * state_dimension, hidden_state_tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestEvalLstmInteger(
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the node
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  // Scratch buffers on the stack
+  LSTMBuffers<CellType> buffers;
+  CellType buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  CellType buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  CellType buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  CellType buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpData(node_contents);
+
+  tflite::EvalLstm<ActivationType, WeightType, CellType, BiasType>(
+      op_data, kernel_content, buffers);
+
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point,
+             dequantized_hidden_state);
+
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(node_contents.GetCellStateData(), batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point,
+             dequantized_cell_state);
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+
+  float dequantized_output[batch_size * state_dimension * time_steps] = {};
+  Dequantize(node_contents.GetOutputData(),
+             batch_size * state_dimension * time_steps,
+             quantization_settings.output.scale,
+             quantization_settings.output.zero_point, dequantized_output);
+  ValidateResultGoldens(eval_check_data.expected_output, dequantized_output,
+                        batch_size * state_dimension, hidden_state_tolerance);
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h
index ee34b8489..dbdc3c553 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_shared.h
@@ -15,6 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
 #define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
 
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
 namespace tflite {
 
 // Input Tensors of size {n_batch, n_input}
@@ -63,5 +66,85 @@ constexpr int kLstmOutputLayerNormCoefficientsTensor = 23;  // Optional
 // Output tensors.
 constexpr int kLstmOutputTensor = 0;
 
+// Parameters for the two fully conncted computation inside each gate
+struct GateParameters {
+  FullyConnectedParams input_fc_params;
+  FullyConnectedParams recurrent_fc_params;
+};
+
+// Paramaters for the element wise multiplications between gate outputs
+struct InterGateParameters {
+  ArithmeticParams forget_cell_mul_params;
+  ArithmeticParams input_mul_params;
+  ArithmeticParams output_mul_params;
+};
+
+// Size information about the LSTM kernel, which is deduced from tensors stored
+// in the flat buffer file.
+struct LstmSizeInfo {
+  bool time_major;
+  int batch_size;
+  int time_steps;
+  int input_dimension;
+  int state_dimension;
+};
+
+// Contains information about the cell state tensor
+struct CellStateInfo {
+  float cell_clip;
+  // clipping range for cell state only 16 bits cell is supported (could be
+  // generalized through templatation)
+  int16_t quantized_cell_clip;
+  // 2^-cell_state_scale_power = cell state scale, required by integer tanh
+  // computation
+  int32_t cell_state_scale_power;
+};
+
+// Contains required computation information for LSTM kernel evaluation.
+// Specifically, it includes shape and quantization settings for the LSTM
+// internal operations. Formatted to support operations defined in the
+// tensorflow/lite/kernels/internal/reference/integer_ops
+// Should be constructed during the preparation phase
+struct OpDataLSTM {
+  LstmSizeInfo size_info;
+  CellStateInfo cell_state_info;
+  TfLiteFusedActivation cell_gate_nonlinear_type;
+  GateParameters forget_gate_parameters;
+  GateParameters input_gate_parameters;
+  GateParameters cell_gate_parameters;
+  GateParameters output_gate_parameters;
+  InterGateParameters inter_gate_parameters;
+  int buffer_indices[4];  // TFLM only
+};
+
+// Provide an interface to access the internal tensors and buffers used for LSTM
+// invocation. Constructed during the invocation phase
+struct LSTMKernelContents {
+ public:
+  // Internal tensors, fixed (const). see lstm_shared.h for tensor names
+  const TfLiteEvalTensor* GetInternalTensor(const int tensor_index) const {
+    return internal_tensors[tensor_index];
+  }
+  // Variable tensors (will be changed, can not be const)
+  TfLiteEvalTensor* HiddenStateTensor() {
+    return internal_tensors[kLstmOutputStateTensor];
+  }
+  TfLiteEvalTensor* CellStateTensor() {
+    return internal_tensors[kLstmCellStateTensor];
+  }
+  // Node internal tensors with indexes defined at the beginning of the file
+  TfLiteEvalTensor* internal_tensors[24];
+  TfLiteEvalTensor* output_tensor;
+};
+
+template <typename CellType>
+struct LSTMBuffers {
+  // TFLM buffers requires buffer index from LstmOpData.
+  CellType* buffer0;
+  CellType* buffer1;
+  CellType* buffer2;
+  CellType* buffer3;
+};
+
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc
index 1aebdefdc..b7b9cba8f 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/maximum_minimum.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,9 +26,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace maximum_minimum {
+
 namespace {
 
 // This file has a reference implementation of TFMaximum/TFMinimum.
@@ -65,8 +63,6 @@ struct MinimumOp {
   }
 };
 
-}  // namespace
-
 template <typename data_type, typename op_type>
 void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
                      const OpContext& op_context) {
@@ -111,22 +107,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace maximum_minimum
+}  // namespace
 
 TfLiteRegistration Register_MAXIMUM() {
-  return tflite::micro::RegisterOp(
-      nullptr, nullptr,
-      maximum_minimum::Eval<maximum_minimum::kReference,
-                            maximum_minimum::MaximumOp>);
+  return tflite::micro::RegisterOp(nullptr, nullptr,
+                                   Eval<kReference, MaximumOp>);
 }
 
 TfLiteRegistration Register_MINIMUM() {
-  return tflite::micro::RegisterOp(
-      nullptr, nullptr,
-      maximum_minimum::Eval<maximum_minimum::kReference,
-                            maximum_minimum::MinimumOp>);
+  return tflite::micro::RegisterOp(nullptr, nullptr,
+                                   Eval<kReference, MinimumOp>);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
index df2a8d2c3..252efc629 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -42,55 +42,80 @@ TfLiteRegistration Register_BROADCAST_ARGS();
 TfLiteRegistration Register_BROADCAST_TO();
 TfLiteRegistration Register_CALL_ONCE();
 TfLiteRegistration Register_CAST();
+TfLiteRegistration Register_CEIL();
 // TODO(b/160234179): Change custom OPs to also return by value.
 TfLiteRegistration* Register_CIRCULAR_BUFFER();
+TfLiteRegistration Register_CONCATENATION();
+TfLiteRegistration Register_CONV_2D();
 TfLiteRegistration Register_CUMSUM();
 TfLiteRegistration Register_DEPTH_TO_SPACE();
 TfLiteRegistration Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration Register_DEQUANTIZE();
 TfLiteRegistration Register_DIV();
 TfLiteRegistration Register_ELU();
+TfLiteRegistration Register_EQUAL();
+TfLiteRegistration* Register_ETHOSU();
 TfLiteRegistration Register_EXP();
 TfLiteRegistration Register_EXPAND_DIMS();
 TfLiteRegistration Register_FILL();
+TfLiteRegistration Register_FLOOR();
 TfLiteRegistration Register_FLOOR_DIV();
 TfLiteRegistration Register_FLOOR_MOD();
+TfLiteRegistration Register_FULLY_CONNECTED();
 TfLiteRegistration Register_GATHER();
 TfLiteRegistration Register_GATHER_ND();
+TfLiteRegistration Register_GREATER();
+TfLiteRegistration Register_GREATER_EQUAL();
 TfLiteRegistration Register_HARD_SWISH();
 TfLiteRegistration Register_IF();
+TfLiteRegistration Register_L2_NORMALIZATION();
 TfLiteRegistration Register_L2_POOL_2D();
 TfLiteRegistration Register_LEAKY_RELU();
+TfLiteRegistration Register_LESS();
+TfLiteRegistration Register_LESS_EQUAL();
 TfLiteRegistration Register_LOG_SOFTMAX();
 TfLiteRegistration Register_LOGICAL_AND();
 TfLiteRegistration Register_LOGICAL_OR();
 TfLiteRegistration Register_LOGISTIC();
 TfLiteRegistration Register_MAX_POOL_2D();
+TfLiteRegistration Register_MAXIMUM();
+TfLiteRegistration Register_MEAN();
+TfLiteRegistration Register_MINIMUM();
 TfLiteRegistration Register_MIRROR_PAD();
-TfLiteRegistration Register_NEG();
-TfLiteRegistration Register_PRELU();
 TfLiteRegistration Register_MUL();
+TfLiteRegistration Register_NEG();
+TfLiteRegistration Register_NOT_EQUAL();
+TfLiteRegistration Register_PACK();
 TfLiteRegistration Register_PAD();
 TfLiteRegistration Register_PADV2();
+TfLiteRegistration Register_PRELU();
 TfLiteRegistration Register_QUANTIZE();
 TfLiteRegistration Register_READ_VARIABLE();
+TfLiteRegistration Register_REDUCE_MAX();
 TfLiteRegistration Register_RELU();
 TfLiteRegistration Register_RELU6();
 TfLiteRegistration Register_RESIZE_BILINEAR();
+TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
 TfLiteRegistration Register_SELECT_V2();
 TfLiteRegistration Register_SHAPE();
 TfLiteRegistration Register_SLICE();
+TfLiteRegistration Register_SOFTMAX();
 TfLiteRegistration Register_SPACE_TO_BATCH_ND();
 TfLiteRegistration Register_SPACE_TO_DEPTH();
+TfLiteRegistration Register_SPLIT();
+TfLiteRegistration Register_SPLIT_V();
 TfLiteRegistration Register_SQUARED_DIFFERENCE();
 TfLiteRegistration Register_SQUEEZE();
+TfLiteRegistration Register_STRIDED_SLICE();
 TfLiteRegistration Register_SUB();
 TfLiteRegistration Register_SUM();
 TfLiteRegistration Register_SVDF();
+TfLiteRegistration Register_TANH();
 TfLiteRegistration Register_TRANSPOSE();
 TfLiteRegistration Register_TRANSPOSE_CONV();
 // TODO(b/230666079): resolve conflict with xtensa implementation
 TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
+TfLiteRegistration Register_UNPACK();
 TfLiteRegistration Register_VAR_HANDLE();
 TfLiteRegistration Register_WHILE();
 TfLiteRegistration Register_ZEROS_LIKE();
@@ -99,35 +124,15 @@ namespace ops {
 namespace micro {
 
 TfLiteRegistration Register_ABS();
-TfLiteRegistration Register_CEIL();
-TfLiteRegistration Register_CONCATENATION();
 TfLiteRegistration Register_COS();
-TfLiteRegistration Register_EQUAL();
-TfLiteRegistration Register_FLOOR();
-TfLiteRegistration Register_GREATER();
-TfLiteRegistration Register_GREATER_EQUAL();
-TfLiteRegistration Register_LESS();
-TfLiteRegistration Register_LESS_EQUAL();
 TfLiteRegistration Register_LOG();
 TfLiteRegistration Register_LOGICAL_NOT();
-TfLiteRegistration Register_MAXIMUM();
-TfLiteRegistration Register_MINIMUM();
-TfLiteRegistration Register_NOT_EQUAL();
-TfLiteRegistration Register_PACK();
 TfLiteRegistration Register_RESHAPE();
-TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
 TfLiteRegistration Register_ROUND();
 TfLiteRegistration Register_RSQRT();
 TfLiteRegistration Register_SIN();
-TfLiteRegistration Register_SPLIT();
-TfLiteRegistration Register_SPLIT_V();
 TfLiteRegistration Register_SQRT();
 TfLiteRegistration Register_SQUARE();
-TfLiteRegistration Register_STRIDED_SLICE();
-TfLiteRegistration Register_UNPACK();
-TfLiteRegistration Register_L2_NORMALIZATION();
-TfLiteRegistration Register_TANH();
-
 }  // namespace micro
 }  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc
index dd5dfc40c..45e7c1e4c 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -54,7 +54,7 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
 
   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
 
-  if (output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
         context, params->activation, output, &data->output_activation_min,
         &data->output_activation_max));
@@ -68,6 +68,12 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
     data->input1_zero_point = input1->params.zero_point;
     data->input2_zero_point = input2->params.zero_point;
     data->output_zero_point = output->params.zero_point;
+
+    if (input1->type == kTfLiteInt16) {
+      TF_LITE_ENSURE_EQ(context, data->input1_zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, data->input2_zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, data->output_zero_point, 0);
+    }
   } else if (output->type == kTfLiteInt32) {
     CalculateActivationRange(params->activation, &data->output_activation_min,
                              &data->output_activation_max);
@@ -148,9 +154,9 @@ TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node,
                          tflite::micro::GetTensorData<int32_t>(output));
     }
   } else if (input1->type == kTfLiteInt16) {
-    TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0.0);
-    TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0.0);
-    TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0.0);
+    TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0);
+    TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0);
+    TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0);
 
     if (need_broadcast) {
       reference_integer_ops::BroadcastMul4DSlow(
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
index 5e322b87b..5a4eb4f53 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,9 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pack {
+
 namespace {
 
 constexpr int kOutputTensor = 0;
@@ -106,12 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }
 
 }  // namespace
-}  // namespace pack
 
 TfLiteRegistration Register_PACK() {
-  return tflite::micro::RegisterOp(nullptr, nullptr, pack::Eval);
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
index d9b147ad8..050913c5a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,7 +43,12 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
       AveragePoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      AveragePoolingEvalQuantized(context, node, params, data, input, output);
+      AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                          output);
+      break;
+    case kTfLiteInt16:
+      AveragePoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                           output);
       break;
     default:
       MicroPrintf("Input type %s is not currently supported",
@@ -71,7 +76,12 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
       MaxPoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      MaxPoolingEvalQuantized(context, node, params, data, input, output);
+      MaxPoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                      output);
+      break;
+    case kTfLiteInt16:
+      MaxPoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                       output);
       break;
     default:
       MicroPrintf("Type %s not currently supported.",
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
index 493250ee1..5298b271f 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,7 +20,14 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
+#include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
@@ -50,27 +57,69 @@ void AveragePoolingEvalFloat(const TfLiteContext* context,
                              const TfLiteEvalTensor* input,
                              TfLiteEvalTensor* output);
 
+template <typename T>
 void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
                                  const TfLitePoolParams* params,
                                  const OpDataPooling* data,
                                  const TfLiteEvalTensor* input,
-                                 TfLiteEvalTensor* output);
+                                 TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::AveragePool(op_params,
+                                     tflite::micro::GetTensorShape(input),
+                                     tflite::micro::GetTensorData<T>(input),
+                                     tflite::micro::GetTensorShape(output),
+                                     tflite::micro::GetTensorData<T>(output));
+}
 
 void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          TfLitePoolParams* params, const OpDataPooling* data,
                          const TfLiteEvalTensor* input,
                          TfLiteEvalTensor* output);
 
+template <typename T>
 void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
                              TfLitePoolParams* params,
                              const OpDataPooling* data,
                              const TfLiteEvalTensor* input,
-                             TfLiteEvalTensor* output);
+                             TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::MaxPool(op_params,
+                                 tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<T>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<T>(output));
+}
 
-#if defined(CMSIS_NN)
+#if defined(CMSIS_NN) || defined(XTENSA)
 TfLiteRegistration Register_AVERAGE_POOL_2D_INT8();
 
 TfLiteRegistration Register_MAX_POOL_2D_INT8();
+
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT16();
+
+TfLiteRegistration Register_MAX_POOL_2D_INT16();
 #else
 inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
   return tflite::Register_AVERAGE_POOL_2D();
@@ -79,6 +128,14 @@ inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
 inline TfLiteRegistration Register_MAX_POOL_2D_INT8() {
   return tflite::Register_MAX_POOL_2D();
 }
+
+inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() {
+  return tflite::Register_AVERAGE_POOL_2D();
+}
+
+inline TfLiteRegistration Register_MAX_POOL_2D_INT16() {
+  return tflite::Register_MAX_POOL_2D();
+}
 #endif
 }  // namespace tflite
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
index ddc18f0bb..b39e9d846 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -69,10 +69,14 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
   if (input->type == kTfLiteFloat32) {
     CalculateActivationRange(params->activation, &data->activation_min_f32,
                              &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8) {
+  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     CalculateActivationRangeQuantized(context, params->activation, output,
                                       &data->activation_min,
                                       &data->activation_max);
+  } else {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
+    return kTfLiteError;
   }
 
   micro_context->DeallocateTempTfLiteTensor(input);
@@ -102,30 +106,6 @@ void AveragePoolingEvalFloat(const TfLiteContext* context,
                              tflite::micro::GetTensorData<float>(output));
 }
 
-void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
-                                 const TfLitePoolParams* params,
-                                 const OpDataPooling* data,
-                                 const TfLiteEvalTensor* input,
-                                 TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->type == kTfLiteInt8);
-
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  reference_integer_ops::AveragePool(
-      op_params, tflite::micro::GetTensorShape(input),
-      tflite::micro::GetTensorData<int8_t>(input),
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<int8_t>(output));
-}
-
 void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          TfLitePoolParams* params, const OpDataPooling* data,
                          const TfLiteEvalTensor* input,
@@ -145,26 +125,4 @@ void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          tflite::micro::GetTensorData<float>(output));
 }
 
-void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                             TfLitePoolParams* params,
-                             const OpDataPooling* data,
-                             const TfLiteEvalTensor* input,
-                             TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  reference_integer_ops::MaxPool(op_params,
-                                 tflite::micro::GetTensorShape(input),
-                                 tflite::micro::GetTensorData<int8_t>(input),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<int8_t>(output));
-}
-
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc
index 832ba2612..0c6806d12 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/reshape.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -68,6 +68,11 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
     }
   }
   if (stretch_dim != -1) {
+    TfLiteEvalTensor* output_eval =
+        tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+    TF_LITE_ENSURE_STATUS(tflite::micro::CreateWritableTensorDimsWithCopy(
+        context, output, output_eval));
+    output_shape = output->dims;  // output tensor dims were moved
     output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
     num_output_elements *= output_shape->data[stretch_dim];
   }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
index 756cf03fa..4ed09d00a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,9 +24,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace resize_nearest_neighbor {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kSizeTensor = 1;
@@ -114,13 +113,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   return kTfLiteOk;
 }
-}  // namespace resize_nearest_neighbor
+
+}  // namespace
 
 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
-  return tflite::micro::RegisterOp(nullptr, resize_nearest_neighbor::Prepare,
-                                   resize_nearest_neighbor::Eval);
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc
index 4ff748562..226e4bf78 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,9 +21,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace split {
+
+namespace {
 
 template <typename T>
 TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
@@ -117,12 +116,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace split
+}  // namespace
 
 TfLiteRegistration Register_SPLIT() {
-  return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval);
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc
index d0002d57c..236536f15 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/split_v.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,9 +22,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace split_v {
+
+namespace {
 
 template <typename T>
 TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
@@ -119,12 +118,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace split_v
+}  // namespace
 
 TfLiteRegistration Register_SPLIT_V() {
-  return tflite::micro::RegisterOp(nullptr, split_v::Prepare, split_v::Eval);
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc
index 8786a8715..6b21a097f 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/squared_difference.cc
@@ -44,6 +44,44 @@ void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer,
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
+void PrepareQuantized(
+    const TfLiteQuantizationParams& input1_quantization_params,
+    const TfLiteQuantizationParams& input2_quantization_params,
+    const TfLiteQuantizationParams& output_quantization_params,
+    const int left_shift, const int32_t quantized_activation_min,
+    const int32_t quantized_activation_max, OpData* data) {
+  data->arithmetic_params.input1_offset =
+      -input1_quantization_params.zero_point;
+  data->arithmetic_params.input2_offset =
+      -input2_quantization_params.zero_point;
+  data->arithmetic_params.output_offset = output_quantization_params.zero_point;
+  data->arithmetic_params.left_shift = left_shift;
+  const double twice_max_input_scale =
+      2.0 * static_cast<double>(std::max(input1_quantization_params.scale,
+                                         input2_quantization_params.scale));
+  const double real_input1_multiplier =
+      static_cast<double>(input1_quantization_params.scale) /
+      twice_max_input_scale;
+  double real_input2_multiplier =
+      static_cast<double>(input2_quantization_params.scale) /
+      twice_max_input_scale;
+  const double real_output_multiplier =
+      (twice_max_input_scale * twice_max_input_scale) /
+      static_cast<double>((1 << data->arithmetic_params.left_shift * 2) *
+                          output_quantization_params.scale);
+  QuantizeMultiplierSmallerThanOneExp(
+      real_input1_multiplier, &data->arithmetic_params.input1_multiplier,
+      &data->arithmetic_params.input1_shift);
+  QuantizeMultiplierSmallerThanOneExp(
+      real_input2_multiplier, &data->arithmetic_params.input2_multiplier,
+      &data->arithmetic_params.input2_shift);
+  QuantizeMultiplier(real_output_multiplier,
+                     &data->arithmetic_params.output_multiplier,
+                     &data->arithmetic_params.output_shift);
+  data->arithmetic_params.quantized_activation_min = quantized_activation_min;
+  data->arithmetic_params.quantized_activation_max = quantized_activation_max;
+}
+
 TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
                                       TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
@@ -68,11 +106,10 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
   output->type = input2->type;
 
-  // Ensure the quantization parameters are equivalent.
+  const TfLiteQuantizationParams& input1_quantization_params = input1->params;
+  const TfLiteQuantizationParams& input2_quantization_params = input2->params;
+  const TfLiteQuantizationParams& output_quantization_params = output->params;
   if (input1->type == kTfLiteInt8) {
-    const auto& input1_quantization_params = input1->params;
-    const auto& input2_quantization_params = input2->params;
-    const auto& output_quantization_params = output->params;
     const int32_t integer_type_min = std::numeric_limits<int8_t>::min();
     const int32_t integer_type_max = std::numeric_limits<int8_t>::max();
     TF_LITE_ENSURE(context,
@@ -87,43 +124,25 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
                    output_quantization_params.zero_point >= integer_type_min);
     TF_LITE_ENSURE(context,
                    output_quantization_params.zero_point <= integer_type_max);
-    data->arithmetic_params.input1_offset =
-        -input1_quantization_params.zero_point;
-    data->arithmetic_params.input2_offset =
-        -input2_quantization_params.zero_point;
-    data->arithmetic_params.output_offset =
-        output_quantization_params.zero_point;
-
-    // shift to make integer for scales.
-    // 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 ))
-    // does not overflow signed 32-bit integer
-    data->arithmetic_params.left_shift = 7;
-    const double twice_max_input_scale =
-        2.0 * static_cast<double>(std::max(input1_quantization_params.scale,
-                                           input2_quantization_params.scale));
-    const double real_input1_multiplier =
-        static_cast<double>(input1_quantization_params.scale) /
-        twice_max_input_scale;
-    double real_input2_multiplier =
-        static_cast<double>(input2_quantization_params.scale) /
-        twice_max_input_scale;
-    const double real_output_multiplier =
-        (twice_max_input_scale * twice_max_input_scale) /
-        static_cast<double>((1 << data->arithmetic_params.left_shift * 2) *
-                            output_quantization_params.scale);
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input1_multiplier, &data->arithmetic_params.input1_multiplier,
-        &data->arithmetic_params.input1_shift);
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input2_multiplier, &data->arithmetic_params.input2_multiplier,
-        &data->arithmetic_params.input2_shift);
-    QuantizeMultiplierSmallerThanOneExp(
-        real_output_multiplier, &data->arithmetic_params.output_multiplier,
-        &data->arithmetic_params.output_shift);
-    data->arithmetic_params.quantized_activation_min =
-        std::numeric_limits<int8_t>::min();
-    data->arithmetic_params.quantized_activation_max =
-        std::numeric_limits<int8_t>::max();
+    // leftshift = 7 is selected so that maximum shifted result 255^2 * (1 << (7
+    // * 2 )) does not overflow signed 32-bit integer
+    PrepareQuantized(input1_quantization_params, input2_quantization_params,
+                     output_quantization_params, /*left_shift=*/7,
+                     /*quantized_activation_min*/ integer_type_min,
+                     /*quantized_activation_max*/ integer_type_max, data);
+  } else if (input1->type == kTfLiteInt16) {
+    const int32_t integer_type_min = std::numeric_limits<int16_t>::min();
+    const int32_t integer_type_max = std::numeric_limits<int16_t>::max();
+    TF_LITE_ENSURE(context, input1_quantization_params.zero_point == 0);
+    TF_LITE_ENSURE(context, input2_quantization_params.zero_point == 0);
+    TF_LITE_ENSURE(context, output_quantization_params.zero_point == 0);
+
+    // leftshift = 0 as number is already 16-bit. so that maximum shifted result
+    // 32767^2 * (1 << (0 * 2 ))
+    PrepareQuantized(input1_quantization_params, input2_quantization_params,
+                     output_quantization_params, /*left_shift=*/0,
+                     /*quantized_activation_min*/ integer_type_min,
+                     /*quantized_activation_max*/ integer_type_max, data);
   }
 
   data->requires_broadcast = !HaveSameShapes(input1, input2);
@@ -134,8 +153,8 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
   return kTfLiteOk;
 }
 
-inline int8_t SquaredDifference(int8_t x, int8_t y,
-                                const ArithmeticParams& params) {
+template <typename T>
+T SquaredDifference(T x, T y, const ArithmeticParams& params) {
   const int32_t input1_val = params.input1_offset + x;
   const int32_t input2_val = params.input2_offset + y;
   const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
@@ -148,16 +167,16 @@ inline int8_t SquaredDifference(int8_t x, int8_t y,
           shifted_input2_val, params.input2_multiplier, params.input2_shift);
   const int32_t raw_diff = scaled_input1_val - scaled_input2_val;
 
-  // Max of this is 255^2 * (1 << 14), so won't overflow 32 bits.
+  // Max of this is 32767^2 * (1 << 0), so won't overflow 32 bits.
   const int32_t squared_raw_diff = raw_diff * raw_diff;
   const int32_t raw_output =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          squared_raw_diff, params.output_multiplier, params.output_shift) +
+      MultiplyByQuantizedMultiplier(squared_raw_diff, params.output_multiplier,
+                                    params.output_shift) +
       params.output_offset;
   const int32_t clamped_output =
       std::min(params.quantized_activation_max,
                std::max(params.quantized_activation_min, raw_output));
-  return static_cast<int8_t>(clamped_output);
+  return static_cast<T>(clamped_output);
 }
 
 template <typename T>
@@ -180,9 +199,9 @@ void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node,
     const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize();
     reference_integer_ops::ElementWise(
         flat_size, op_data->arithmetic_params,
-        tflite::micro::GetTensorData<int8_t>(input1),
-        tflite::micro::GetTensorData<int8_t>(input2),
-        tflite::micro::GetTensorData<int8_t>(output),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorData<T>(input2),
+        tflite::micro::GetTensorData<T>(output),
         reference_integer_ops::CheckArithmeticParams, SquaredDifference);
   }
 }
@@ -228,9 +247,13 @@ TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) {
   } else if (output->type == kTfLiteInt8) {
     EvalQuantizedSquaredDifference<int8_t>(context, node, data, input1, input2,
                                            output);
+  } else if (output->type == kTfLiteInt16) {
+    EvalQuantizedSquaredDifference<int16_t>(context, node, data, input1, input2,
+                                            output);
   } else {
     MicroPrintf(
-        "SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.",
+        "SquaredDifference only supports FLOAT32, INT32 , INT16 and INT8 now, "
+        "got %d.",
         output->type);
     return kTfLiteError;
   }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc
index 9985cf913..fede95480 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/strided_slice.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,9 +26,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace strided_slice {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kBeginTensor = 1;
@@ -198,13 +197,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   }
   return kTfLiteOk;
 }
-}  // namespace strided_slice
+
+}  // namespace
 
 TfLiteRegistration Register_STRIDED_SLICE() {
-  return tflite::micro::RegisterOp(strided_slice::Init, strided_slice::Prepare,
-                                   strided_slice::Eval);
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc
index 7ad3aa6aa..d6647462f 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/sub_common.cc
@@ -52,14 +52,12 @@ TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
     const float twice_max_input_scale =
         2 * std::max(input1->params.scale, input2->params.scale);
     const double real_input1_multiplier =
-        static_cast<double>(input1->params.scale) /
-        static_cast<double>(twice_max_input_scale);
+        static_cast<double>(input1->params.scale / twice_max_input_scale);
     const double real_input2_multiplier =
-        static_cast<double>(input2->params.scale) /
-        static_cast<double>(twice_max_input_scale);
+        static_cast<double>(input2->params.scale / twice_max_input_scale);
     const double real_output_multiplier =
-        static_cast<double>(twice_max_input_scale) /
-        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+        static_cast<double>(twice_max_input_scale /
+                            ((1 << data->left_shift) * output->params.scale));
 
     QuantizeMultiplierSmallerThanOneExp(
         real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc
index ed74358bc..fb92b4fd7 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/svdf_common.cc
@@ -451,21 +451,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
 
     TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
 
-    const double effective_scale_1 =
-        static_cast<double>(input->params.scale) *
-        static_cast<double>(weights_feature->params.scale) /
-        static_cast<double>(activation_state->params.scale);
+    const double effective_scale_1 = static_cast<double>(
+        input->params.scale * weights_feature->params.scale /
+        activation_state->params.scale);
     const double effective_scale_2 =
-        static_cast<double>(activation_state->params.scale) *
-        static_cast<double>(weights_time->params.scale) /
-        static_cast<double>(output->params.scale);
+        static_cast<double>(activation_state->params.scale *
+                            weights_time->params.scale / output->params.scale);
 
     // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
     TF_LITE_ENSURE(
         context,
         std::abs(static_cast<double>(bias->params.scale) -
-                 (static_cast<double>(activation_state->params.scale) *
-                  static_cast<double>(weights_time->params.scale))) < 1e-5);
+                 static_cast<double>(activation_state->params.scale *
+                                     weights_time->params.scale)) < 1e-5);
 
     QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
                        &(data->effective_scale_1_b));
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc
index e10399307..33ea8d2be 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/tanh.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -28,10 +28,9 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
+
 namespace {
+
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
@@ -148,8 +147,6 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace
-
 TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
       tflite::micro::GetEvalInput(context, node, kInputTensor);
@@ -193,12 +190,10 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
   }
 }
 
-}  // namespace activations
+}  // namespace
 
 TfLiteRegistration Register_TANH() {
-  return tflite::micro::RegisterOp(
-      activations::TanhInit, activations::TanhPrepare, activations::TanhEval);
+  return tflite::micro::RegisterOp(TanhInit, TanhPrepare, TanhEval);
 }
-}  // namespace micro
-}  // namespace ops
+
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
index e7187ef0d..12ddd6934 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
@@ -1,3 +1,5 @@
+load("@tflm_pip_deps//:requirements.bzl", "requirement")
+
 package(
     default_visibility = ["//tensorflow/lite/micro/kernels:__pkg__"],
     # Disabling layering_check because of http://b/177257332
@@ -15,3 +17,48 @@ cc_library(
     hdrs = ["conv_test_data.h"],
     deps = ["//tensorflow/lite/c:common"],
 )
+
+cc_library(
+    name = "lstm_test_data",
+    srcs = ["lstm_test_data.cc"],
+    hdrs = [
+        "lstm_test_data.h",
+    ],
+    deps = [
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/micro:test_helpers",
+        "//tensorflow/lite/micro/kernels:lstm_shared",
+    ],
+)
+
+####################################
+# Python
+####################################
+py_binary(
+    name = "lstm_test_data_generator",
+    srcs = [
+        "lstm_test_data_generator.py",
+        "lstm_test_data_utils.py",
+    ],
+    srcs_version = "PY3",
+    deps = [
+        "@absl_py//absl:app",
+        requirement("numpy"),
+        requirement("tensorflow-cpu"),
+    ],
+)
+
+py_test(
+    name = "lstm_test_data_generator_test",
+    srcs = ["lstm_test_data_generator_test.py"],
+    main = "lstm_test_data_generator_test.py",
+    python_version = "PY3",
+    tags = [
+        "noasan",
+        "nomsan",  # Python doesn't like these symbols from interpreter_wrapper_pybind.so
+        "noubsan",
+    ],
+    deps = [
+        ":lstm_test_data_generator",
+    ],
+)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc
new file mode 100644
index 000000000..4d7d9d9ed
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc
@@ -0,0 +1,309 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h"
+
+#include <cstring>
+
+namespace tflite {
+namespace testing {
+
+namespace {
+// LSTM internal setting (e.g., nonlinear activation type)
+// Only UnidirectionalLSTM is supported now
+constexpr TfLiteUnidirectionalSequenceLSTMParams kDefaultBuiltinData = {
+    /*.activation=*/kTfLiteActTanh,
+    /*.cell_clip=*/6,
+    /*.proj_clip=*/3,
+    /*.time_major=*/false,
+    /*.asymmetric_quantize_inputs=*/true,
+    /*diagonal_recurrent_tensors=*/false};
+}  // namespace
+
+GateOutputCheckData<4, 4> Get2X2GateOutputCheckData() {
+  GateOutputCheckData<4, 4> gate_data;
+  const float input_data[4] = {
+      0.2, 0.3,    // batch1
+      -0.98, 0.62  // batch2
+  };
+  std::memcpy(gate_data.input_data, input_data, 4 * sizeof(float));
+
+  const float hidden_state[4] = {
+      -0.1, 0.2,  // batch1
+      -0.3, 0.5   // batch2
+  };
+  std::memcpy(gate_data.hidden_state, hidden_state, 4 * sizeof(float));
+
+  const float cell_state[4] = {
+      -1.3, 6.2,  // batch1
+      -7.3, 3.5   // batch2
+  };
+  std::memcpy(gate_data.cell_state, cell_state, 4 * sizeof(float));
+
+  // Use the forget gate parameters to test small gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[-10,-10],[-20,-20]][0.2,
+  // +[[-10,-10],[-20,-20]][-0.1, 0.2]+[1,2]) = sigmoid([-5,-10]) =
+  // [6.69285092e-03, 4.53978687e-05] (Batch1)
+  // Similarly, we have [0.93086158 0.9945137 ] for batch 2
+  const float expected_forget_gate_output[4] = {6.69285092e-3f, 4.53978687e-5f,
+                                                0.93086158, 0.9945137};
+  std::memcpy(gate_data.expected_forget_gate_output,
+              expected_forget_gate_output, 4 * sizeof(float));
+
+  // Use the input gate parameters to test small gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[10,10],[20,20]][0.2, 0.3]
+  // +[[10,10],[20,20]][-0.1, 0.2]+[-1,-2]) = sigmoid([5,10]) =
+  // [0.99330715, 0.9999546]
+  // Similarly, we have [0.06913842 0.0054863 ] for batch 2
+  const float expected_input_gate_output[4] = {0.99330715, 0.9999546,
+                                               0.06913842, 0.0054863};
+  std::memcpy(gate_data.expected_input_gate_output, expected_input_gate_output,
+              4 * sizeof(float));
+
+  // Use the output gate parameters to test normnal gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[1,1],[1,1]][0.2, 0.3]
+  // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = sigmoid([0.6,0.6]) =
+  // [0.6456563062257954, 0.6456563062257954]
+  // Similarly, we have [[0.46008512 0.46008512]] for batch 2
+  const float expected_output_gate_output[4] = {
+      0.6456563062257954, 0.6456563062257954, 0.46008512, 0.46008512};
+  std::memcpy(gate_data.expected_output_gate_output,
+              expected_output_gate_output, 4 * sizeof(float));
+
+  // Use the cell(modulation) gate parameters to tanh output
+  // output = tanh(W_i*i+W_h*h+b) = tanh([[1,1],[1,1]][0.2, 0.3]
+  // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = tanh([0.6,0.6]) =
+  // [0.6456563062257954, 0.6456563062257954]
+  // Similarly, we have [-0.1586485 -0.1586485] for batch 2
+  const float expected_cell_gate_output[4] = {
+      0.5370495669980353, 0.5370495669980353, -0.1586485, -0.1586485};
+  std::memcpy(gate_data.expected_cell_gate_output, expected_cell_gate_output,
+              4 * sizeof(float));
+
+  // Cell = forget_gate*cell + input_gate*cell_gate
+  // Note -6.80625824 is clipped to -6
+  const float expected_updated_cell[4] = {0.52475447, 0.53730665, -6,
+                                          3.47992756};
+  std::memcpy(gate_data.expected_updated_cell, expected_updated_cell,
+              4 * sizeof(float));
+
+  // Use the updated cell state to update the hidden state
+  // tanh(expected_updated_cell) * expected_output_gate_output
+  const float expected_updated_hidden[4] = {0.31079388, 0.3169827, -0.46007947,
+                                            0.45921249};
+  std::memcpy(gate_data.expected_updated_hidden, expected_updated_hidden,
+              4 * sizeof(float));
+  return gate_data;
+}
+
+// TODO(b/253466487): document how the golden values are arrived at
+LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData() {
+  LstmEvalCheckData<12, 4, 12> eval_data;
+  const float input_data[12] = {
+      0.2,   0.3,  0.2,  0.3,  0.2,  0.3,   // batch one
+      -0.98, 0.62, 0.01, 0.99, 0.49, -0.32  // batch two
+  };
+  std::memcpy(eval_data.input_data, input_data, 12 * sizeof(float));
+
+  // Initialize hidden state as zeros
+  const float hidden_state[4] = {};
+  std::memcpy(eval_data.hidden_state, hidden_state, 4 * sizeof(float));
+
+  // The expected model output after 3 time steps using the fixed input and
+  // parameters
+  const float expected_output[12] = {
+      0.26455893,      0.26870455,      0.47935803,
+      0.47937014,      0.58013272,      0.58013278,  // batch1
+      -1.41184672e-3f, -1.43329117e-5f, 0.46887168,
+      0.46891281,      0.50054074,      0.50054148  // batch2
+  };
+  std::memcpy(eval_data.expected_output, expected_output, 12 * sizeof(float));
+
+  const float expected_hidden_state[4] = {
+      0.58013272, 0.58013278,  // batch1
+      0.50054074, 0.50054148   // batch2
+  };
+  std::memcpy(eval_data.expected_hidden_state, expected_hidden_state,
+              4 * sizeof(float));
+
+  const float expected_cell_state[4] = {
+      0.89740515, 0.8974053,  // batch1
+      0.80327607, 0.80327785  // batch2
+  };
+  std::memcpy(eval_data.expected_cell_state, expected_cell_state,
+              4 * sizeof(float));
+  return eval_data;
+}
+
+LstmNodeContent<float, float, float, float, 2, 3, 2, 2>
+Create2x3x2X2FloatNodeContents(const float* input_data,
+                               const float* hidden_state_data,
+                               const float* cell_state_data) {
+  // Parameters for different gates
+  // negative large weights for forget gate to make it really forget
+  const GateData<float, float, 2, 2> forget_gate_data = {
+      /*.activation_weight=*/{-10, -10, -20, -20},
+      /*.recurrent_weight=*/{-10, -10, -20, -20},
+      /*.fused_bias=*/{1, 2},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // positive large weights for input gate to make it really remember
+  const GateData<float, float, 2, 2> input_gate_data = {
+      /*.activation_weight=*/{10, 10, 20, 20},
+      /*.recurrent_weight=*/{10, 10, 20, 20},
+      /*.fused_bias=*/{-1, -2},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // all ones to test the behavior of tanh at normal range (-1,1)
+  const GateData<float, float, 2, 2> cell_gate_data = {
+      /*.activation_weight=*/{1, 1, 1, 1},
+      /*.recurrent_weight=*/{1, 1, 1, 1},
+      /*.fused_bias=*/{0, 0},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // all ones to test the behavior of sigmoid at normal range (-1. 1)
+  const GateData<float, float, 2, 2> output_gate_data = {
+      /*.activation_weight=*/{1, 1, 1, 1},
+      /*.recurrent_weight=*/{1, 1, 1, 1},
+      /*.fused_bias=*/{0, 0},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+
+  LstmNodeContent<float, float, float, float, 2, 3, 2, 2> float_node_contents(
+      kDefaultBuiltinData, forget_gate_data, input_gate_data, cell_gate_data,
+      output_gate_data);
+
+  if (input_data != nullptr) {
+    float_node_contents.SetInputData(input_data);
+  }
+  if (hidden_state_data != nullptr) {
+    float_node_contents.SetHiddenStateData(hidden_state_data);
+  }
+  if (cell_state_data != nullptr) {
+    float_node_contents.SetCellStateData(cell_state_data);
+  }
+  return float_node_contents;
+}
+
+NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings() {
+  NodeQuantizationParameters quantization_settings;
+  quantization_settings.activation_type = kTfLiteInt8;
+  quantization_settings.weight_type = kTfLiteInt8;
+  quantization_settings.cell_type = kTfLiteInt16;
+  quantization_settings.bias_type = kTfLiteInt32;
+  quantization_settings.nonlinear_activation_input_scale =
+      0.00024414062;  // std::pow(2.0f, -12.0f)
+  quantization_settings.nonlinear_activation_output_scale =
+      0.00003051757;  // std::pow(2.0f, -15.0f)
+
+  // state quantization parameters
+  quantization_settings.input = {/*scale=*/0.00784313725490196, /*zp=*/0,
+                                 /*symmetry=*/false};
+  quantization_settings.output = {/*scale=*/0.004705882165580988, /*zp=*/-21,
+                                  /*symmetry=*/false};
+  quantization_settings.hidden_state = {/*scale=*/0.004705882165580988,
+                                        /*zp=*/-21, /*symmetry=*/false};
+  quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0,
+                                      /*symmetry=*/true};
+
+  // gate quantization parameters
+  quantization_settings.forget_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.input_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.cell_gate = {
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.output_gate = {
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}};
+  return quantization_settings;
+}
+
+NodeQuantizationParameters Get2X2Int16LstmQuantizationSettings() {
+  NodeQuantizationParameters quantization_settings;
+  quantization_settings.activation_type = kTfLiteInt16;
+  quantization_settings.weight_type = kTfLiteInt8;
+  quantization_settings.cell_type = kTfLiteInt16;
+  quantization_settings.bias_type = kTfLiteInt64;
+  quantization_settings.nonlinear_activation_input_scale =
+      0.00024414062;  // std::pow(2.0f, -12.0f)
+  quantization_settings.nonlinear_activation_output_scale =
+      0.00003051757;  // std::pow(2.0f, -15.0f)
+
+  // state quantization parameters
+  quantization_settings.input = {/*scale=*/3.0518044e-5, /*zp=*/0,
+                                 /*symmetry=*/false};
+  quantization_settings.output = {/*scale=*/1.8310826e-5, /*zp=*/-5461,
+                                  /*symmetry=*/false};
+  quantization_settings.hidden_state = {/*scale=*/1.8310826e-5, /*zp=*/-5461,
+                                        /*symmetry=*/false};
+  quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0,
+                                      /*symmetry=*/true};
+
+  // gate quantization parameters
+  quantization_settings.forget_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.input_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/4.8059911474468205e-06, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.cell_gate = {
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.output_gate = {
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/2.40299557372341e-07, /*zp=*/0, /*symmetry=*/true}};
+  return quantization_settings;
+}
+
+LstmNodeContent<int8_t, int8_t, int32_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int8NodeContents(const float* input_data,
+                              const float* hidden_state,
+                              const float* cell_state) {
+  auto float_node_content =
+      Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state);
+  const auto quantization_settings = Get2X2Int8LstmQuantizationSettings();
+  return CreateIntegerNodeContents<int8_t, int8_t, int32_t, int16_t, 2, 3, 2,
+                                   2>(quantization_settings,
+                                      /*fold_zero_point=*/true,
+                                      float_node_content);
+}
+
+LstmNodeContent<int16_t, int8_t, int64_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int16NodeContents(const float* input_data,
+                               const float* hidden_state,
+                               const float* cell_state) {
+  auto float_node_content =
+      Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state);
+  const auto quantization_settings = Get2X2Int16LstmQuantizationSettings();
+  return CreateIntegerNodeContents<int16_t, int8_t, int64_t, int16_t, 2, 3, 2,
+                                   2>(quantization_settings,
+                                      /*fold_zero_point=*/false,
+                                      float_node_content);
+}
+
+}  // namespace testing
+}  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
new file mode 100644
index 000000000..3edf4200a
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
@@ -0,0 +1,579 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
+#include <string>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/micro/kernels/lstm_shared.h"
+#include "tensorflow/lite/micro/test_helpers.h"
+
+namespace tflite {
+namespace testing {
+// Data structure to store all the data used to check output of internal gates
+// of one time step
+// input_size = batch_size*input_dimension (size of the input array)
+// gate_output_size = batch_size*state_dimension (size of the gate output)
+template <int input_size, int gate_output_size>
+struct GateOutputCheckData {
+  float input_data[input_size];
+  float hidden_state[gate_output_size];
+  float cell_state[gate_output_size];
+  float expected_forget_gate_output[gate_output_size];
+  float expected_input_gate_output[gate_output_size];
+  float expected_output_gate_output[gate_output_size];
+  float expected_cell_gate_output[gate_output_size];
+  float expected_updated_cell[gate_output_size];
+  float expected_updated_hidden[gate_output_size];
+};
+
+// Data structure to store all the data used to check the output of the kernel
+// of multiple batch, multiple timesteps
+// input_size = batch_size*time_steps*input_dimension (size of the input array)
+// gate_output_size = batch_size*state_dimension (size of the gate output)
+// output_size = time_steps*gate_output_size (size of the output from the
+// kernel)
+template <int input_size, int gate_output_size, int output_size>
+struct LstmEvalCheckData {
+  float input_data[input_size];
+  float hidden_state[gate_output_size];
+  float expected_output[output_size];
+  float expected_hidden_state[gate_output_size];
+  float expected_cell_state[gate_output_size];
+};
+
+// Struct that holds the weight/bias information for a standard gate (i.e. no
+// modification such as layer normalization, peephole, etc.)
+// Every gate is defined by the type and size of the weights (bias included)
+// inside.
+// Specifically, types are weight type and bias type (normally the same
+// type of MatMul accumulator).
+// activation_weight has shape (hidden state dimension * input tensor dimension)
+// recurrent_weight has shape (hidden state dimension * hidden state dimension)
+// bias has shape (hidden state dimension, 1)
+template <typename WeightType, typename BiasType, int input_dimension,
+          int state_dimension>
+struct GateData {
+  WeightType activation_weight[state_dimension * input_dimension];
+  WeightType recurrent_weight[state_dimension * state_dimension];
+  BiasType fused_bias[state_dimension];
+  // Quantized model folded the zero point of activations into biases:
+  // bias + zero_point * weight.
+  // Note: folded bias is only required for the legacy 8x8->16 pass. Therefore
+  // the data type is fixed here to avoid compilation errors (the computation of
+  // folding does not support other types)
+  int32_t activation_zp_folded_bias[state_dimension];
+  int32_t recurrent_zp_folded_bias[state_dimension];
+};
+
+// A struct that holds quantization parameters for a LSTM Tensor
+struct TensorQuantizationParameters {
+  double scale;
+  int zero_point;
+  bool symmetry;
+};
+
+// A struct that holds quantization parameters for an internal gate, which is
+// defined by activation/recurrent weight and bias (assuming no internal layer
+// normalization)
+struct GateQuantizationParameters {
+  TensorQuantizationParameters activation_weight;
+  TensorQuantizationParameters recurrent_weight;
+  TensorQuantizationParameters bias;
+};
+
+// A struct that holds the quantization settings for the LSTM node. Data
+// members can be grouped into five parts.
+// 1. Data types (activation,weight, cell, bias)
+// 2. Non-linear activation (i.e., tanh and sigmoid) fixed point
+// calculation settings
+// 3. Input/output tensor quantization settings
+// 4. Internal state (hidden and cell) quantization settings
+// 5. Internal gate (forget, input, cell, output) settings
+struct NodeQuantizationParameters {
+  TfLiteType activation_type;
+  TfLiteType weight_type;
+  TfLiteType cell_type;
+  TfLiteType bias_type;
+  // Fixed point setting for integer nonlinear activation calculation
+  double nonlinear_activation_input_scale;
+  double nonlinear_activation_output_scale;
+  // Quantization parameters for input/output
+  TensorQuantizationParameters input;
+  TensorQuantizationParameters output;
+  // Quantization parameters for internal states
+  TensorQuantizationParameters hidden_state;
+  TensorQuantizationParameters cell_state;
+  // Quantization parameters for gates
+  GateQuantizationParameters forget_gate;
+  GateQuantizationParameters input_gate;
+  GateQuantizationParameters cell_gate;
+  GateQuantizationParameters output_gate;
+};
+
+// Data structure that holds all the information to evaluate a LSTM kernel
+// (mimic the LSTM node).
+// Tensor Types:
+// ActivationType defines the data type of input/output of the layer. The hidden
+// state has the ActivationType as well since it is the layer output of the
+// previous time.
+// WeightType defines the weight data type inside the internal gates.
+// BiasType defines the bias data type inside the internal gates. (normally the
+// same type of MatMul accumulator).
+// Tensor Shapes:
+// The input to the layer has shape (batch_size,time_steps,input_dimension).
+// Both the hidden state and cell state has shape (state_dimension, 1)
+// The output of the layer has shape (batch_size,time_steps,state_dimension)
+//  Note: state values can change through calls (stateful)
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+class LstmNodeContent {
+ public:
+  LstmNodeContent(const LstmNodeContent& other) = default;
+  LstmNodeContent& operator=(const LstmNodeContent& other) = default;
+  // Use the general model setting (builtin data) and the four gates data to
+  // construct the node content. Note the input, hidden state, and cell state
+  // data is provided later for flexible testing (initialize as zero now)
+  LstmNodeContent(
+      const TfLiteUnidirectionalSequenceLSTMParams builtin_data,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          forget_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          input_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          cell_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          output_gate_params)
+      : builtin_data_(builtin_data),
+        forget_gate_data_(forget_gate_params),
+        input_gate_data_(input_gate_params),
+        cell_gate_data_(cell_gate_params),
+        output_gate_data_(output_gate_params) {
+    InitializeTensors();
+  }
+
+  // Add quantization parameters (scale, zero point) to tensors
+  // Only required for the integer kernel
+  void AddQuantizationParameters(
+      const NodeQuantizationParameters& quantization_params) {
+    quantization_settings_ = quantization_params;
+    // Input Tensor
+    SetTensorQuantizationParam(kLstmInputTensor, quantization_params.input);
+    // Forget Gate Tensors
+    const auto& forget_gate_quant_param = quantization_params.forget_gate;
+    SetTensorQuantizationParam(kLstmInputToForgetWeightsTensor,
+                               forget_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToForgetWeightsTensor,
+                               forget_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmForgetGateBiasTensor,
+                               forget_gate_quant_param.bias);
+    // Input Gate Tensors
+    const auto& input_gate_quant_param = quantization_params.input_gate;
+    SetTensorQuantizationParam(kLstmInputToInputWeightsTensor,
+                               input_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToInputWeightsTensor,
+                               input_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmInputGateBiasTensor,
+                               input_gate_quant_param.bias);
+    // Cell Gate Tensors
+    const auto& cell_gate_quant_param = quantization_params.cell_gate;
+    SetTensorQuantizationParam(kLstmInputToCellWeightsTensor,
+                               cell_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToCellWeightsTensor,
+                               cell_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmCellGateBiasTensor,
+                               cell_gate_quant_param.bias);
+    // Output Gate Tensors
+    const auto& output_gate_quant_param = quantization_params.output_gate;
+    SetTensorQuantizationParam(kLstmInputToOutputWeightsTensor,
+                               output_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToOutputWeightsTensor,
+                               output_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmOutputGateBiasTensor,
+                               output_gate_quant_param.bias);
+    // State Tensors
+    SetTensorQuantizationParam(kLstmOutputStateTensor,
+                               quantization_params.hidden_state);
+    SetTensorQuantizationParam(kLstmCellStateTensor,
+                               quantization_params.cell_state);
+    // Output Tensor
+    SetTensorQuantizationParam(24, quantization_params.output);
+  }
+
+  // Provide interface to set the input tensor values for flexible testing
+  void SetInputData(const ActivationType* data) {
+    std::memcpy(
+        input_, data,
+        batch_size * input_dimension * time_steps * sizeof(ActivationType));
+    SetTensor(kLstmInputTensor, input_, input_size_);
+  }
+  const ActivationType* GetInputData() const { return input_; }
+
+  // Provide interface to set the hidden state tensor values for flexible
+  // testing
+  void SetHiddenStateData(const ActivationType* data) {
+    std::memcpy(hidden_state_, data,
+                batch_size * state_dimension * sizeof(ActivationType));
+  }
+  ActivationType* GetHiddenStateData() { return hidden_state_; }
+
+  // Provide interface to set the cell state tensor values for flexible
+  // testing
+  void SetCellStateData(const CellType* data) {
+    std::memcpy(cell_state_, data,
+                batch_size * state_dimension * sizeof(CellType));
+  }
+  CellType* GetCellStateData() { return cell_state_; }
+  ActivationType* GetOutputData() { return output_; }
+
+  // Internal tensors, see lstm_shared.h for tensor names
+  TfLiteEvalTensor* GetEvalTensor(const int tensor_index) {
+    auto valid_index = input_tensor_indices_[tensor_index + 1];
+    if (valid_index < 0) {
+      return nullptr;
+    }
+    return &eval_tensors_[tensor_index];
+  }
+
+  TfLiteTensor* GetTensors() { return tensors_; }
+
+  // Required by the kernel runner
+  TfLiteIntArray* KernelInputs() {
+    return IntArrayFromInts(input_tensor_indices_);
+  }
+  // Required by the kernel runner
+  TfLiteIntArray* KernelOutputs() {
+    return IntArrayFromInts(output_tensor_indices_);
+  }
+
+  // Variable tensors (will be changed, can not be const)
+  TfLiteEvalTensor* HiddenStateEvalTensor() {
+    return &eval_tensors_[kLstmOutputStateTensor];
+  }
+  TfLiteEvalTensor* CellStateEvalTensor() {
+    return &eval_tensors_[kLstmCellStateTensor];
+  }
+  TfLiteEvalTensor* OutputEvalTensor() { return &eval_tensors_[24]; }
+
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  ForgetGateData() const {
+    return forget_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  InputGateData() const {
+    return input_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  CellGateData() const {
+    return cell_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  OutputGateData() const {
+    return output_gate_data_;
+  }
+
+  const TfLiteUnidirectionalSequenceLSTMParams& BuiltinData() const {
+    return builtin_data_;
+  }
+
+  const NodeQuantizationParameters& QuantizationSettings() const {
+    return quantization_settings_;
+  }
+
+ private:
+  void InitializeTensors() {
+    // Invalid all the input tensors untill we set it
+    input_tensor_indices_[0] = 24;  // tot elements
+    for (size_t i = 1; i < 25; i++) {
+      input_tensor_indices_[i] = kTfLiteOptionalTensor;
+    }
+    // Input Tensor
+    SetTensor(kLstmInputTensor, input_, input_size_);
+    // Forget Gate Tensors
+    SetTensor(kLstmInputToForgetWeightsTensor,
+              forget_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToForgetWeightsTensor,
+              forget_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmForgetGateBiasTensor, forget_gate_data_.fused_bias,
+              bias_size_);
+    // Input Gate Tensors
+    SetTensor(kLstmInputToInputWeightsTensor,
+              input_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToInputWeightsTensor,
+              input_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmInputGateBiasTensor, input_gate_data_.fused_bias,
+              bias_size_);
+    // Cell Gate Tensors
+    SetTensor(kLstmInputToCellWeightsTensor, cell_gate_data_.activation_weight,
+              activation_weight_size_);
+    SetTensor(kLstmRecurrentToCellWeightsTensor,
+              cell_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmCellGateBiasTensor, cell_gate_data_.fused_bias, bias_size_);
+    // Output Gate Tensors
+    SetTensor(kLstmInputToOutputWeightsTensor,
+              output_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToOutputWeightsTensor,
+              output_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmOutputGateBiasTensor, output_gate_data_.fused_bias,
+              bias_size_);
+    // State Tensors
+    SetTensor(kLstmOutputStateTensor, hidden_state_, state_size_,
+              /*is_variable=*/true);
+    SetTensor(kLstmCellStateTensor, cell_state_, state_size_,
+              /*is_variable=*/true);
+    // // Output Tensor
+    SetTensor(24, output_, output_size_, /*is_variable=*/true);
+  }
+
+  template <typename T>
+  void SetTensor(const int index, const T* data, int* dims,
+                 const bool is_variable = false) {
+    // Lite tensors for kernel level testing
+    tensors_[index].data.data = const_cast<T*>(data);
+    tensors_[index].dims = IntArrayFromInts(dims);
+    tensors_[index].type = typeToTfLiteType<T>();
+    tensors_[index].is_variable = is_variable;
+    // Eval tensors for internal computation testing
+    eval_tensors_[index].data.data = const_cast<T*>(data);
+    eval_tensors_[index].dims = IntArrayFromInts(dims);
+    eval_tensors_[index].type = typeToTfLiteType<T>();
+    // update the index
+    if (index < 24) {
+      input_tensor_indices_[index + 1] = index;
+    }
+  }
+
+  void SetTensorQuantizationParam(
+      const int index, const TensorQuantizationParameters& quant_param) {
+    tensors_[index].params.scale = quant_param.scale;
+    tensors_[index].params.zero_point = quant_param.zero_point;
+  }
+
+  const TfLiteUnidirectionalSequenceLSTMParams builtin_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      forget_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      input_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      cell_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      output_gate_data_;
+
+  // Keep to ease the testing process (although all quantization information can
+  // be obtained from individual tensors, they are well organized here and light
+  // weighted)
+  NodeQuantizationParameters quantization_settings_;
+
+  // Not const since IntArrayFromInts takes int *; the first element of the
+  // array must be the size of the array
+  int input_size_[4] = {3, batch_size, time_steps, input_dimension};
+  int output_size_[4] = {3, batch_size, time_steps, state_dimension};
+  // weight tensor has C-style "row-major" memory ordering
+  int activation_weight_size_[3] = {2, state_dimension, input_dimension};
+  int recurrent_weight_size_[3] = {2, state_dimension, state_dimension};
+  int bias_size_[2] = {1, state_dimension};
+  int state_size_[3] = {2, batch_size, state_dimension};
+
+  // see lstm_shared.h for tensor names, the last tensor is the output tensor
+  TfLiteTensor tensors_[24 + 1];
+  // Use for internel kernel testing
+  TfLiteEvalTensor eval_tensors_[24 + 1];
+  // indices for the tensors inside the node (required by kernel runner)
+  int input_tensor_indices_[1 + 24] = {};
+  // single output (last in the tensors array)
+  int output_tensor_indices_[2] = {1, 24};
+
+  // tennsor data
+  // states are initialized to zero
+  ActivationType hidden_state_[batch_size * state_dimension] = {};
+  CellType cell_state_[batch_size * state_dimension] = {};
+  // input is defined in the ModelContent (const across all derived models)
+  ActivationType input_[batch_size * input_dimension * time_steps] = {};
+  ActivationType output_[batch_size * state_dimension * time_steps] = {};
+};
+
+//  Converts floating point gate parameters to the corresponding quantized
+//  version
+template <typename WeightType, typename BiasType, int input_dimension,
+          int state_dimension>
+GateData<WeightType, BiasType, input_dimension, state_dimension>
+CreateQuantizedGateData(
+    const GateData<float, float, input_dimension, state_dimension>&
+        gate_parameters,
+    const TensorQuantizationParameters& input_quantization_params,
+    const TensorQuantizationParameters& output_quantization_params,
+    const GateQuantizationParameters& gate_quantization_params,
+    const bool fold_zero_point) {
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      quantized_gate_params;
+  tflite::SymmetricQuantize(gate_parameters.activation_weight,
+                            quantized_gate_params.activation_weight,
+                            state_dimension * input_dimension,
+                            gate_quantization_params.activation_weight.scale);
+  tflite::SymmetricQuantize(gate_parameters.recurrent_weight,
+                            quantized_gate_params.recurrent_weight,
+                            state_dimension * state_dimension,
+                            gate_quantization_params.recurrent_weight.scale);
+  tflite::SymmetricQuantize(gate_parameters.fused_bias,
+                            quantized_gate_params.fused_bias, state_dimension,
+                            gate_quantization_params.bias.scale);
+  // Note: steps below are not required for the generalized LSTM evaluation
+  // (e.g., 16bits activation)
+  if (fold_zero_point) {
+    // Copy the bias values to prepare zero_point folded
+    // bias precomputation. bias has same scale as
+    // input_scale*input_weight_scale)
+    std::memcpy(quantized_gate_params.activation_zp_folded_bias,
+                quantized_gate_params.fused_bias, 2 * sizeof(int32_t));
+    // Pre-calculate bias - zero_point * weight (a constant).
+    tflite::tensor_utils::MatrixScalarMultiplyAccumulate(
+        quantized_gate_params.activation_weight,
+        -1 * input_quantization_params.zero_point, 2, 2,
+        quantized_gate_params.activation_zp_folded_bias);
+
+    // Initialize the folded bias to zeros for accumulation
+    for (size_t i = 0; i < 2; i++) {
+      quantized_gate_params.recurrent_zp_folded_bias[i] = 0;
+    }
+    // Calculate : -zero_point * weight since it is a constant
+    tflite::tensor_utils::MatrixScalarMultiplyAccumulate(
+        quantized_gate_params.recurrent_weight,
+        -1 * output_quantization_params.zero_point, 2, 2,
+        quantized_gate_params.recurrent_zp_folded_bias);
+  }
+  return quantized_gate_params;
+}
+
+// Create integer LSTM node content from the float node contents and
+// quantization settings
+// Note: fold_zero_point folds the zero point into the bias (precomputation),
+// which is not required for the generalized integer inference (16 bits act
+// LSTM).
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                time_steps, input_dimension, state_dimension>
+CreateIntegerNodeContents(
+    const NodeQuantizationParameters& quantization_settings,
+    const bool fold_zero_point,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& float_node_contents) {
+  const auto quantized_forget_gate_data =
+      CreateQuantizedGateData<WeightType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.ForgetGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.forget_gate,
+          fold_zero_point);
+  const auto quantized_input_gate_data =
+      CreateQuantizedGateData<WeightType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.InputGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.input_gate,
+          fold_zero_point);
+  const auto quantized_cell_gate_data =
+      CreateQuantizedGateData<WeightType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.CellGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.cell_gate,
+          fold_zero_point);
+  const auto quantized_output_gate_params =
+      CreateQuantizedGateData<WeightType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.OutputGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.output_gate,
+          fold_zero_point);
+  LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                  time_steps, input_dimension, state_dimension>
+      quantized_node_content(
+          float_node_contents.BuiltinData(), quantized_forget_gate_data,
+          quantized_input_gate_data, quantized_cell_gate_data,
+          quantized_output_gate_params);
+
+  // Quantize the floating point input
+  ActivationType quantized_input[batch_size * input_dimension * time_steps] =
+      {};
+  Quantize(float_node_contents.GetInputData(), quantized_input,
+           batch_size * input_dimension * time_steps,
+           quantization_settings.input.scale,
+           quantization_settings.input.zero_point);
+  quantized_node_content.SetInputData(quantized_input);
+  // Quantize the  floating point hidden state
+  ActivationType quantized_hidden_state[batch_size * state_dimension] = {};
+  Quantize(float_node_contents.GetHiddenStateData(), quantized_hidden_state,
+           batch_size * state_dimension,
+           quantization_settings.hidden_state.scale,
+           quantization_settings.hidden_state.zero_point);
+  quantized_node_content.SetHiddenStateData(quantized_hidden_state);
+  // Quantize the floating point cell state
+  CellType quantized_cell_state[batch_size * state_dimension] = {};
+  Quantize(float_node_contents.GetCellStateData(), quantized_cell_state,
+           batch_size * state_dimension, quantization_settings.cell_state.scale,
+           quantization_settings.cell_state.zero_point);
+  quantized_node_content.SetCellStateData(quantized_cell_state);
+
+  // Add scale and zero point to tensors
+  quantized_node_content.AddQuantizationParameters(quantization_settings);
+  return quantized_node_content;
+}
+
+// Get the gate output data (one time step) for a simple 2X2 model
+// batch_size = 2; time_steps = 1; input_dimension = 2; state_dimension = 2
+// input_size = batch_size*time_steps*input_dimension = 4
+// gate_output_size = batch_size*state_dimension = 4
+GateOutputCheckData<4, 4> Get2X2GateOutputCheckData();
+
+// Get the kernel output data for a simple 2X2 model
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+// input_size = batch_size*time_steps*input_dimension = 12
+// gate_output_size = batch_size*state_dimension = 4
+// output_size = time_steps*gate_output_size = 12
+LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData();
+
+// Create a 2x2 float node content
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+LstmNodeContent<float, float, float, float, 2, 3, 2, 2>
+Create2x3x2X2FloatNodeContents(const float* input_data = nullptr,
+                               const float* hidden_state = nullptr,
+                               const float* cell_state = nullptr);
+
+// Get the quantization settings for the 2X2 model
+NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings();
+
+// Create int8 (activation) x int8 (weight) -> int16 (cell) node
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+// input is in float format since the source of truth is always the float
+// configuration
+LstmNodeContent<int8_t, int8_t, int32_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int8NodeContents(const float* input_data = nullptr,
+                              const float* hidden_state = nullptr,
+                              const float* cell_state = nullptr);
+
+// Create int16 (activation) x int8 (weight) -> int16 (cell) node
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+// input is in float format since the source of truth is always the float
+// configuration
+LstmNodeContent<int16_t, int8_t, int64_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int16NodeContents(const float* input_data = nullptr,
+                               const float* hidden_state = nullptr,
+                               const float* cell_state = nullptr);
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py
new file mode 100644
index 000000000..97c8798ef
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator.py
@@ -0,0 +1,192 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" Generate the LSTM kernel test data settings in lstm_test_data.cc
+1. Print the quantization settings for the test model (Get2X2Int8LstmQuantizationSettings in .cc)
+2. Print the intermediate step outputs inside the LSTM for a single step LSTM invocation (Get2X2GateOutputCheckData in .cc)
+3. Print the outputs for multi-step LSTM invocation (Get2X2LstmEvalCheckData in .cc)
+
+Every invocation gives three types information: 
+1. Quantized output: kernel output in integer 
+2. Dequantized output: Quantized output in floating point representation
+3. Float output: output from the floating point computation (i.e., float kernel)
+
+Note: 
+1. Change quantization settings in _KERNEL_CONFIG to see the outcomes from various quantization schema (e.g., 8x8 Vs. 16x8)
+2. Only single batch inference is supporte here. Change _GATE_TEST_DATA or _MULTISTEP_TEST_DATA to see kernel outputs on different input data
+3. The quantization computation here is not the exact as the c++ implementation. The integer calculation is mimiced here using floating point. 
+No fixed point math is implemented here. The purpose is to illustrate the computation procedure and possible quantization error accumulation, not for bit exactness.
+"""
+from absl import app
+import numpy as np
+
+from tflite_micro.tensorflow.lite.micro.kernels.testdata import lstm_test_data_utils
+
+# Basic kernel information (defaul a 2x2 model with int8 quantization)
+# change activation_bits to 16 for 16x8 case
+_KERNEL_CONFIG = {
+    'quantization_settings': {
+        'weight_bits': 8,
+        'activation_bits': 8,
+        'bias_bits': 32,
+        'cell_bits': 16,
+    },
+    'shape_info': {
+        'input_dim': 2,
+        'state_dim': 2
+    }
+}
+
+# Kernel data setting (weight data for every gate). Corresponds to Create2x3x2X2FloatNodeContents in .cc
+_KERNEL_PARAMETERS = {
+    'forget_gate_data': {
+        'activation_weight_data': [-10, -10, -20, -20],
+        'recurrent_weight_data': [-10, -10, -20, -20],
+        'bias_data': [1, 2],
+    },
+    'input_gate_data': {
+        'activation_weight_data': [10, 10, 20, 20],
+        'recurrent_weight_data': [10, 10, 20, 20],
+        'bias_data': [-1, -2],
+    },
+    'cell_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+    'output_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+}
+
+# Input and states setting for gate level testing (Get2X2GateOutputCheckData in .cc)
+# Only single batch inference is supported (default as batch1 in .cc)
+_GATE_TEST_DATA = {
+    'init_hidden_state_vals': [-0.1, 0.2],
+    'init_cell_state_vals': [-1.3, 6.2],
+    'input_data': [0.2, 0.3],
+    'hidden_state_range': (-0.5, 0.7),
+    'cell_state_range': [-8, 8],
+    'input_data_range': [-1, 1]
+}
+
+# Input and states setting for multi-step kernel testing (Get2X2LstmEvalCheckData in .cc)
+# Only single batch inference is supported (default as batch1 in .cc)
+_MULTISTEP_TEST_DATA = {
+    'init_hidden_state_vals': [0, 0],
+    'init_cell_state_vals': [0, 0],
+    'input_data': [0.2, 0.3, 0.2, 0.3, 0.2, 0.3],  # three time steps 
+    'hidden_state_range': (-0.5, 0.7),
+    'cell_state_range': [-8, 8],
+    'input_data_range': [-1, 1]
+}
+
+
+def print_tensor_quantization_params(tensor_name, tensor):
+  """Print the tensor quantization information (scale and zero point)"""
+  print(f"{tensor_name}, scale: {tensor.scale}, zero_point:"
+        f" {tensor.zero_point}")
+
+
+def print_gate_tensor_params(gate_name, gate):
+  """Print the quantization information for a gate (input/forget/cell/output gate)"""
+  print(f"###### Quantization settings for {gate_name} ######")
+  print_tensor_quantization_params("activation weight", gate.activation_weight)
+  print_tensor_quantization_params("recurrent weight", gate.activation_weight)
+
+
+def print_quantization_settings(lstm_debugger):
+  """Print the quantization information for a LSTM kernel"""
+  print_gate_tensor_params("forget gate", lstm_debugger.forget_gate_params)
+  print_gate_tensor_params("input gate", lstm_debugger.input_gate_params)
+  print_gate_tensor_params("cell gate", lstm_debugger.modulation_gate_params)
+  print_gate_tensor_params("output gate", lstm_debugger.output_gate_params)
+  print("###### State Tensors ######")
+  print_tensor_quantization_params("Hidden State Tensor",
+                                   lstm_debugger.hidden_state_tensor)
+  print_tensor_quantization_params("Cell State Tensor",
+                                   lstm_debugger.cell_state_tensor)
+
+
+def print_one_step(lstm_debugger):
+  """Print the intermediate calculation results for one step LSTM invocation (Get2X2GateOutputCheckData in .cc)"""
+  test_data = np.array(_GATE_TEST_DATA['input_data']).reshape((-1, 1))
+  input_data_range = _GATE_TEST_DATA['input_data_range']
+  input_tensor = lstm_test_data_utils.assemble_quantized_tensor(
+      test_data,
+      input_data_range[0],
+      input_data_range[1],
+      symmetry=False,
+      num_bits=_KERNEL_CONFIG['quantization_settings']['activation_bits'])
+  lstm_debugger.invoke(input_tensor, debug=True)
+
+
+def print_multi_step(lstm_debugger, debug=False):
+  """Print the output of every step for multi step LSTM invocation (Get2X2LstmEvalCheckData in .cc)"""
+  input_data = _MULTISTEP_TEST_DATA['input_data']
+  input_data_range = _MULTISTEP_TEST_DATA['input_data_range']
+  input_data_size = _KERNEL_CONFIG['shape_info']['input_dim']
+  input_start_pos = 0
+  steps = 0
+  while input_start_pos < len(input_data):
+    one_step_data = np.array(input_data[input_start_pos:input_start_pos +
+                                        input_data_size]).reshape((-1, 1))
+    input_tensor = lstm_test_data_utils.assemble_quantized_tensor(
+        one_step_data,
+        input_data_range[0],
+        input_data_range[1],
+        symmetry=False,
+        num_bits=_KERNEL_CONFIG['quantization_settings']['activation_bits'])
+    output_quant, output_float = lstm_debugger.invoke(input_tensor,
+                                                      debug=debug)
+    print(f"##### Step: {steps} #####")
+    print(f"Quantized Output: {output_quant.flatten()}")
+    print(
+        f"Dequantized Output: {lstm_debugger.hidden_state_tensor.dequantized_data.flatten().flatten()}"
+    )
+    print(f"Float Output: {output_float.flatten()}")
+    input_start_pos += input_data_size
+    steps += 1
+
+
+def main(_):
+  one_step_lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger(
+      _KERNEL_CONFIG,
+      _KERNEL_PARAMETERS,
+      _GATE_TEST_DATA['init_hidden_state_vals'],
+      _GATE_TEST_DATA['hidden_state_range'],
+      _GATE_TEST_DATA['init_cell_state_vals'],
+      _GATE_TEST_DATA['cell_state_range'],
+  )
+  print("========== Quantization Settings for the Test Kernal ========== ")
+  print_quantization_settings(one_step_lstm_debugger)
+  print("========== Single Step Invocation Intermediates  ========== ")
+  print_one_step(one_step_lstm_debugger)
+
+  multi_step_lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger(
+      _KERNEL_CONFIG,
+      _KERNEL_PARAMETERS,
+      _MULTISTEP_TEST_DATA['init_hidden_state_vals'],
+      _MULTISTEP_TEST_DATA['hidden_state_range'],
+      _MULTISTEP_TEST_DATA['init_cell_state_vals'],
+      _MULTISTEP_TEST_DATA['cell_state_range'],
+  )
+  print("========== Multi Step Invocation Intermediates  ========== ")
+  print_multi_step(multi_step_lstm_debugger)
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py
new file mode 100644
index 000000000..cb5c21de4
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_generator_test.py
@@ -0,0 +1,108 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+from tflite_micro.tensorflow.lite.micro.kernels.testdata import lstm_test_data_utils
+
+_KERNEL_CONFIG = {
+    'quantization_settings': {
+        'weight_bits': 8,
+        'activation_bits': 8,
+        'bias_bits': 32,
+        'cell_bits': 16,
+    },
+    'shape_info': {
+        'input_dim': 2,
+        'state_dim': 2
+    }
+}
+
+_KERNEL_PARAMETERS = {
+    'forget_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+    'input_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+    'cell_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+    'output_gate_data': {
+        'activation_weight_data': [1, 1, 1, 1],
+        'recurrent_weight_data': [1, 1, 1, 1],
+        'bias_data': [0, 0],
+    },
+}
+
+_KERNEL_INITIALIZATION_SETTINGS = {
+    'init_hidden_state_vals': [0, 0],
+    'init_cell_state_vals': [0, 0],
+    'hidden_state_range': (-1, 1),
+    'cell_state_range': [-8, 8],
+}
+
+
+def create_keras_lstm(stateful=True):
+  """Create a keras model with LSTM layer only for testing"""
+  input_layer = tf.keras.layers.Input(shape=(1, 2), batch_size=1, name="input")
+  lstm_output = tf.keras.layers.LSTM(units=2,
+                                     return_sequences=True,
+                                     stateful=stateful,
+                                     unit_forget_bias=False,
+                                     return_state=True,
+                                     kernel_initializer="ones",
+                                     recurrent_initializer="ones",
+                                     bias_initializer="zeros")(input_layer)
+  return tf.keras.Model(input_layer, lstm_output, name="LSTM")
+
+
+class QuantizedLSTMDebuggerTest(test_util.TensorFlowTestCase):
+
+  # only the float output from the debugger is used to setup the test data in .cc
+  def testFloatCompareWithKeras(self):
+    keras_lstm = create_keras_lstm()
+    lstm_debugger = lstm_test_data_utils.QuantizedLSTMDebugger(
+        _KERNEL_CONFIG,
+        _KERNEL_PARAMETERS,
+        _KERNEL_INITIALIZATION_SETTINGS['init_hidden_state_vals'],
+        _KERNEL_INITIALIZATION_SETTINGS['hidden_state_range'],
+        _KERNEL_INITIALIZATION_SETTINGS['init_cell_state_vals'],
+        _KERNEL_INITIALIZATION_SETTINGS['cell_state_range'],
+    )
+
+    num_steps = 20
+    for _ in range(num_steps):
+      # debugger has input shape (input_dim, 1)
+      test_data = np.random.rand(2, 1)
+      input_tensor = lstm_test_data_utils.assemble_quantized_tensor(
+          test_data, -1, 1, False)
+      _, output_float = lstm_debugger.invoke(input_tensor)
+      output_keras, _, _ = keras_lstm.predict(test_data.reshape(1, 1, 2))
+
+      diff = abs(output_float.flatten() - output_keras.flatten())
+      self.assertAllLess(diff, 1e-6)
+
+
+if __name__ == "__main__":
+  test.main()
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py
new file mode 100644
index 000000000..345b143fa
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data_utils.py
@@ -0,0 +1,531 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Utils to lstm_test_data_generator.py that helps to generate the test data for lstm kernel (lstm_test_data.cc)"""
+
+import numpy as np
+from copy import deepcopy
+
+
+def clip_range(vals, bit_width):
+  """Mimic integer calculation.
+  Clip the range of vals based on bit width.
+  e.g., clip_range([300], 8) = [127] since int8 have range [-128, 127]
+  Args:
+      vals (np.array): float representation of the integer values
+      bit_width (int): number of desired bits for vals
+  Returns:
+      np.array : clipped vals
+  """
+  # Numpy integer calculation does not do saturation. Implement here
+  min_val = -2**(bit_width - 1)
+  max_val = 2**(bit_width - 1) - 1
+  if vals.max() > max_val or vals.min() < min_val:
+    print(f"WARNING: integer overflow!")
+  return np.clip(vals, min_val, max_val)
+
+
+def quantize_data(data, scale, zero_point=0, bit_width=8):
+  """Quantize the data to integer type with desired bit width.
+  The quantized data is represented using float since integer calculation in
+  numpy may differ from other implementations (e.g., no integer saturation
+  protection in numpy)
+  Args:
+      data (np.array): float data
+      scale (float): quantization scale of the data
+      zero_point (integer): quantization zero point of the data
+      bit_width (int): number of representative bits for vals
+  Returns:
+      np.array : quantized data in float but clipped range
+  """
+  vals = np.round(data / scale) + zero_point
+  return clip_range(vals, bit_width)
+
+
+def dequantize_data(quantized_data, scale, zero_point=0):
+  """Dequantize the data to integer type with desired bit width.
+  Args:
+      quantized_data (np.array): quantized data
+      scale (float): quantization scale of the data
+      zero_point (integer): quantization zero point of the data
+  Returns:
+      np.array : dequantized data
+  """
+  return scale * (quantized_data - zero_point)
+
+
+def rescale(data, effective_scale, zero_point, num_bits):
+  """Rescale the data to the effective scale """
+  # q = r/s + z
+  rescaled = np.round(data * effective_scale) + zero_point
+  return clip_range(rescaled, num_bits)
+
+
+def calculate_scale(min_val, max_val, num_bits=8, symmetry=False):
+  """Calculate quantization scale from the range and bit width"""
+  num_bins = np.power(2, num_bits) - 1
+  if symmetry:
+    return max(abs(min_val), abs(max_val)) / int(num_bins / 2)
+  return np.array((max_val - min_val) / num_bins, dtype=np.float32)
+
+
+def calculate_zp(min_val, scale, num_bits=8):
+  """Calculate the zero point from the minimal value"""
+  quantized_floor = -np.power(2, num_bits) / 2
+  return int(quantized_floor - min_val / scale)
+
+
+def sigmoid(x):
+  """Sigmoid (floating point)"""
+  return 1 / (1 + np.exp(-x))
+
+
+def quantized_sigmoid(input, input_scale, output_scale, num_bits=16):
+  """Sigmoid (interger)"""
+  float_input = input * input_scale
+  float_result = sigmoid(float_input)
+  return quantize_data(float_result, output_scale, bit_width=num_bits)
+
+
+def quantized_tanh(input, input_scale, output_scale, num_bits=16):
+  """Tanh (interger)"""
+  float_input = input * input_scale
+  float_result = np.tanh(float_input)
+  return quantize_data(float_result, output_scale, bit_width=num_bits)
+
+
+class QuantizedTensor:
+  """Data structure for a quantized tensor"""
+
+  def __init__(self, float_data, scale, zero_point, symmetry, num_bits=8):
+    """Tensor is initialized using the floating point data"""
+    self.float_data = float_data
+    self.scale = scale
+    self.zero_point = int(zero_point)
+    self.symmetry = symmetry
+    self.num_bits = num_bits
+    self.quantized_data = quantize_data(float_data, scale, zero_point,
+                                        num_bits)
+
+  @property
+  def dequantized_data(self):
+    """Dequantize the quantized tensor data back to floating point"""
+    return dequantize_data(self.quantized_data, self.scale,
+                           self.zero_point).flatten()
+
+
+class QuantizedGateParams:
+  """Hold the quantization data and corresponding information for a LSTM gate (forget/input/cell/output gate) """
+
+  def __init__(
+      self,
+      quantized_activation_weight,
+      quantized_recurrent_weight,
+      bias_data_float,
+      shape_info,
+      bias_num_bits=32,
+      cell_num_bits=16,
+      modulation=False,
+  ):
+    self.shape_info = shape_info
+    self.activation_weight = quantized_activation_weight
+    self.recurrent_weight = quantized_recurrent_weight
+    self.bias_data_float = bias_data_float
+    self.modulation = modulation
+    self.bias_num_bits = bias_num_bits
+    self.cell_num_bits = cell_num_bits
+    # For INT16 cell state, the input scale is Q3.12
+    self.nonlinear_input_scale = np.power(2.0, -(cell_num_bits - 4))
+    # For INT16 cell state, the output scale is Q0.15
+    self.nonlinear_output_scale = np.power(2.0, -(cell_num_bits - 1))
+
+  def quantize_bias_data(self, input_scale):
+    bias_scale = self.activation_weight.scale * input_scale
+    return quantize_data(self.bias_data_float, bias_scale, 0,
+                         self.bias_num_bits)
+
+  def fold_zeropoint(self, weight, zero_point):
+    # W*real = W*(quant-zero_pt) = Wquant - Wzero_pt
+    # Wzero_pt is precomputed here as a constant (implemented in TFLM)
+    zp_vector = zero_point * np.ones(shape=(self.shape_info['input_dim'], 1))
+    zero_folded_vector = np.dot(weight, zp_vector)
+    return -1 * clip_range(zero_folded_vector, self.bias_num_bits)
+
+  def compute_activation_bias(self, input_scale, input_zp):
+    # Wz is precomputed here and added it to the original bias (same scale)
+    zero_folded_vector = self.fold_zeropoint(
+        self.activation_weight.quantized_data, input_zp)
+    quantized_bias = self.quantize_bias_data(input_scale)
+    return zero_folded_vector + quantized_bias
+
+  def compute_recurrent_bias(self, recurrent_zp):
+    # Wz is precomputed here
+    return self.fold_zeropoint(self.recurrent_weight.quantized_data,
+                               recurrent_zp)
+
+  def effective_activation_scale(self, input_scale):
+    # Combine input scale with output scale. Used for fc calculation
+    return (self.activation_weight.scale * input_scale /
+            self.nonlinear_input_scale)
+
+  def effective_recurrence_scale(self, recurrent_scale):
+    # Combine input scale with output scale. Used for fc calculation
+    return (self.recurrent_weight.scale * recurrent_scale /
+            self.nonlinear_input_scale)
+
+
+def assemble_quantized_tensor(float_data,
+                              min_val,
+                              max_val,
+                              symmetry,
+                              num_bits=8):
+  """Create a QuantizedTensor using floating point data, range information, and bit width"""
+  scale = calculate_scale(min_val, max_val, num_bits, symmetry)
+  zp = 0
+  if not symmetry:
+    zp = calculate_zp(min_val, scale, num_bits)
+  return QuantizedTensor(float_data,
+                         scale,
+                         zp,
+                         symmetry=symmetry,
+                         num_bits=num_bits)
+
+
+def create_gate_params(gate_parameters, model_config, modulation=False):
+  """Create a QuantizedGateParams using the gate paramater information and the model configuration"""
+  shape_info = model_config['shape_info']
+  quantization_settings = model_config['quantization_settings']
+
+  activation_weight_data = np.array(
+      gate_parameters['activation_weight_data']).reshape(
+          (shape_info['input_dim'], shape_info['state_dim']))
+  activation_weight = assemble_quantized_tensor(
+      activation_weight_data,
+      activation_weight_data.min(),
+      activation_weight_data.max(),
+      True,
+      quantization_settings['weight_bits'],
+  )
+
+  recurrent_weight_data = np.array(
+      gate_parameters['recurrent_weight_data']).reshape(
+          (shape_info['input_dim'], shape_info['state_dim']))
+
+  recurrent_weight = assemble_quantized_tensor(
+      recurrent_weight_data,
+      recurrent_weight_data.min(),
+      recurrent_weight_data.max(),
+      True,
+      quantization_settings['weight_bits'],
+  )
+
+  bias_data_float = np.array(gate_parameters['bias_data']).reshape(
+      (shape_info['input_dim'], 1))
+  gate_params = QuantizedGateParams(
+      activation_weight,
+      recurrent_weight,
+      bias_data_float,
+      shape_info,
+      bias_num_bits=quantization_settings['bias_bits'],
+      cell_num_bits=quantization_settings['cell_bits'],
+      modulation=modulation,
+  )
+  return gate_params
+
+
+def gate_calculation(input, hidden_state, gate_params, debug=False):
+  """
+  A gate calculation is tanh(FC(activation, activation weight) + FC(recurrent, recurrent weight)). 
+  For modulation gate, sigmoid is used instead of tanh.
+
+  Note: for debugging purpose, floating point calculation is conducted in parallel with the integer calculation
+  """
+  # Quantized Version
+  input_fc = np.dot(gate_params.activation_weight.quantized_data,
+                    input.quantized_data)
+  input_fc += gate_params.compute_activation_bias(input.scale,
+                                                  input.zero_point)
+  input_fc = rescale(input_fc,
+                     gate_params.effective_activation_scale(input.scale), 0,
+                     gate_params.cell_num_bits)
+  recurrent_fc = np.dot(gate_params.recurrent_weight.quantized_data,
+                        hidden_state.quantized_data)
+  recurrent_fc += gate_params.compute_recurrent_bias(hidden_state.zero_point)
+  recurrent_fc = rescale(
+      recurrent_fc, gate_params.effective_recurrence_scale(hidden_state.scale),
+      0, gate_params.cell_num_bits)
+
+  before_activation = clip_range(input_fc + recurrent_fc,
+                                 gate_params.cell_num_bits)
+
+  # Float Version
+  float_result = np.dot(gate_params.activation_weight.float_data,
+                        input.float_data)
+  float_result += np.dot(gate_params.recurrent_weight.float_data,
+                         hidden_state.float_data)
+  float_result += gate_params.bias_data_float
+
+  if debug:
+    print(f'input fc: {input_fc.flatten()}')
+    print(f'recurrent fc: {recurrent_fc.flatten()}')
+
+    dequantized_res = dequantize_data(before_activation,
+                                      gate_params.nonlinear_input_scale)
+    print(f'Intermediate before activation: {before_activation.flatten()}')
+    print(f'dequantized :{dequantized_res.flatten()} ')
+    print(f'float computation result: {float_result.flatten()} ')
+
+    diff = dequantized_res - float_result
+    print(f'diff percentage (%): {abs(diff/float_result).flatten()*100}')
+
+  if gate_params.modulation:
+    activated = quantized_tanh(before_activation,
+                               gate_params.nonlinear_input_scale,
+                               gate_params.nonlinear_output_scale,
+                               gate_params.cell_num_bits)
+    float_result = np.tanh(float_result)
+  else:
+    activated = quantized_sigmoid(before_activation,
+                                  gate_params.nonlinear_input_scale,
+                                  gate_params.nonlinear_output_scale,
+                                  gate_params.cell_num_bits)
+    float_result = sigmoid(float_result)
+
+  if debug:
+    dequantized_res = dequantize_data(activated,
+                                      gate_params.nonlinear_output_scale)
+    print(f'Gate result: {activated.flatten()} ')
+    print(f'Dequantized: {dequantized_res.flatten()} ')
+    print(f'float computation result: {float_result.flatten()} ')
+    diff = dequantized_res - float_result
+    print(f'diff percentage (%): {abs(diff/float_result).flatten()*100}')
+
+  return activated, float_result
+
+
+# The LSTM class
+class QuantizedLSTMDebugger(object):
+  """Help the debugging process of the LSTM kernel implementation by
+  1. Exposing the kernel internal computation 
+  2. Run floating point calculation in parallel with the integer version
+  """
+
+  def __init__(
+      self,
+      kernel_config,
+      kernel_params,
+      init_hidden_state_vals,
+      hiddens_state_range,
+      init_cell_state_vals,
+      cell_state_range,
+      cell_clip=8,
+  ):
+    self.kernel_config = kernel_config
+    self.forget_gate_params = create_gate_params(
+        kernel_params['forget_gate_data'], kernel_config)
+    self.input_gate_params = create_gate_params(
+        kernel_params['input_gate_data'], kernel_config)
+    self.modulation_gate_params = create_gate_params(
+        kernel_params['cell_gate_data'], kernel_config, modulation=True)
+    self.output_gate_params = create_gate_params(
+        kernel_params['output_gate_data'], kernel_config)
+    self.quantization_settings = kernel_config['quantization_settings']
+
+    self.hidden_state_tensor = assemble_quantized_tensor(
+        np.array(init_hidden_state_vals).reshape((-1, 1)),
+        hiddens_state_range[0],
+        hiddens_state_range[1],
+        False,
+        self.quantization_settings['activation_bits'],
+    )
+    self.cell_state_tensor = assemble_quantized_tensor(
+        np.array(init_cell_state_vals).reshape((-1, 1)),
+        cell_state_range[0],
+        cell_state_range[1],
+        True,
+        self.quantization_settings['cell_bits'],
+    )
+
+    self.quantized_cell_clip = quantize_data(
+        cell_clip,
+        self.cell_state_tensor.scale,
+        self.cell_state_tensor.zero_point,
+        self.quantization_settings['cell_bits'],
+    )
+
+  def invoke(self, input_tensor, debug=False):
+    assert (
+        input_tensor.num_bits == self.quantization_settings['activation_bits'])
+
+    prev_hidden_state_tensor = deepcopy(self.hidden_state_tensor)
+    prev_cell_state_tensor = deepcopy(self.cell_state_tensor)
+
+    prev_hidden_state_float = prev_hidden_state_tensor.float_data
+    prev_cell_state_float = prev_cell_state_tensor.float_data
+
+    # forget gate
+    forget_gate_quant, forget_gate_float = gate_calculation(
+        input_tensor, prev_hidden_state_tensor, self.forget_gate_params)
+
+    self.cell_state_tensor.quantized_data = rescale(
+        prev_cell_state_tensor.quantized_data * forget_gate_quant,
+        self.forget_gate_params.nonlinear_output_scale,
+        0,
+        self.quantization_settings['cell_bits'],
+    )
+    self.cell_state_tensor.float_data = (prev_cell_state_float *
+                                         forget_gate_float)
+
+    # input gate
+    input_gate_quant, input_gate_float = gate_calculation(
+        input_tensor, prev_hidden_state_tensor, self.input_gate_params)
+
+    modulation_gate_quant, modulation_gate_float = gate_calculation(
+        input_tensor, prev_hidden_state_tensor, self.modulation_gate_params)
+
+    gated_input_quant = rescale(
+        input_gate_quant * modulation_gate_quant,
+        self._calculate_effective_cell_scale(),
+        0,
+        self.quantization_settings['cell_bits'],
+    )
+    gated_input_float = input_gate_float * modulation_gate_float
+
+    if (
+        debug
+    ):  # Hidden/cell state will be updated, break up the debug to record the intermediate state
+      print('======================One Step LSTM======================')
+      print('###### Forget Gate Output: ######')
+      print(f'Quantized: {forget_gate_quant.flatten()}')
+      dequantized_val = dequantize_data(
+          forget_gate_quant, self.forget_gate_params.nonlinear_output_scale, 0)
+      print(f'Dequantized : {dequantized_val.flatten()}')
+      print(f'Float : {forget_gate_float.flatten()}')
+
+      print('###### Cell state after forgetting: ######')
+      print(f'Quantized: {self.cell_state_tensor.quantized_data.flatten()}')
+      print(
+          f'Dequantized: {self.cell_state_tensor.dequantized_data.flatten()}')
+      print(f'Float : {self.cell_state_tensor.float_data.flatten()}')
+
+      print('###### Input gate output: ######')
+      print(f'Quantized: {input_gate_quant.flatten()}')
+      dequantized_val = dequantize_data(
+          input_gate_quant, self.input_gate_params.nonlinear_output_scale, 0)
+      print(f'Dequantized: {dequantized_val.flatten()}')
+      print(f'Float : {input_gate_float.flatten()}')
+
+      print('###### cell gate output: ######')
+      print(f'Quantized: {modulation_gate_quant.flatten()}')
+      dequantized_val = dequantize_data(
+          modulation_gate_quant,
+          self.modulation_gate_params.nonlinear_output_scale,
+          0,
+      )
+      print(f'Dequantized: {dequantized_val.flatten()}')
+      print(f'Float : {modulation_gate_float.flatten()}')
+
+      print('###### Gated input (input_gate * cell_gate): ######')
+      print(f'Quantized: {gated_input_quant.flatten()}')
+      dequantized_val = dequantize_data(gated_input_quant,
+                                        self.cell_state_tensor.scale, 0)
+      print(f'Dequantized: {dequantized_val.flatten()}')
+      print(f'Float : {gated_input_float.flatten()}')
+
+    # Update the cell state
+    self.cell_state_tensor.quantized_data += gated_input_quant
+    self._apply_cell_clip()
+    self.cell_state_tensor.float_data += gated_input_float
+
+    # output gate
+    output_gate_quant, output_gate_float = gate_calculation(
+        input_tensor, prev_hidden_state_tensor, self.output_gate_params)
+
+    # Update the hidden state
+    transformed_cell_quant = quantized_tanh(
+        self.cell_state_tensor.quantized_data,
+        self.output_gate_params.nonlinear_input_scale,
+        self.output_gate_params.nonlinear_output_scale,
+        self.cell_state_tensor.num_bits,
+    )
+
+    transformed_cell_float = np.tanh(self.cell_state_tensor.float_data)
+
+    gated_output_quant = rescale(
+        output_gate_quant * transformed_cell_quant,
+        self._calculate_effective_output_scale(),
+        self.hidden_state_tensor.zero_point,
+        self.hidden_state_tensor.num_bits,
+    )
+    gated_output_float = output_gate_float * transformed_cell_float
+
+    self.hidden_state_tensor.quantized_data = gated_output_quant
+    self.hidden_state_tensor.float_data = gated_output_float
+
+    if debug:
+      print('###### Updated cell state): ######')
+      print(f'Quantized: {self.cell_state_tensor.quantized_data.flatten()}')
+      print(
+          f'Dequantized: {self.cell_state_tensor.dequantized_data.flatten()}')
+      print(f'Float : {self.cell_state_tensor.float_data.flatten()}')
+
+      print('###### Output gate: ######')
+      print(f'Quantized : {output_gate_quant.flatten()}')
+      dequantized_val = dequantize_data(
+          output_gate_quant, self.output_gate_params.nonlinear_output_scale, 0)
+      print(f'Dequantized: {dequantized_val.flatten()}')
+      print(f'Float : {output_gate_float.flatten()}')
+
+      print('###### Tanh transformed cell: ######')
+      print(f'Quantized: {transformed_cell_quant.flatten()}')
+      dequantized_val = dequantize_data(
+          transformed_cell_quant,
+          self.output_gate_params.nonlinear_output_scale,
+          0,
+      )
+      print(f'Dequantized: {dequantized_val.flatten()}')
+      print(f'Float : {transformed_cell_float.flatten()}')
+
+      print('###### Updated hidden state: ######')
+      print(f'Quantized: {gated_output_quant.flatten()}')
+      print(
+          f'Dequantized: {self.hidden_state_tensor.dequantized_data.flatten()}'
+      )
+      print(f'Float : {gated_output_float.flatten()}')
+
+      diff = abs(self.hidden_state_tensor.dequantized_data -
+                 gated_output_float.flatten())
+      max_diff_perc = diff / gated_output_float.flatten() * 100
+      print(f'Max diff perc (%): {max_diff_perc}')
+    return gated_output_quant, gated_output_float
+
+  def _calculate_effective_output_scale(self):
+    return (self.output_gate_params.nonlinear_output_scale *
+            self.modulation_gate_params.nonlinear_output_scale /
+            self.hidden_state_tensor.scale)
+
+  def _calculate_effective_cell_scale(self):
+    return (self.input_gate_params.nonlinear_output_scale *
+            self.modulation_gate_params.nonlinear_output_scale /
+            self.cell_state_tensor.scale)
+
+  def _apply_cell_clip(self):
+    cell_vals = self.cell_state_tensor.quantized_data
+    if (cell_vals.max() > self.quantized_cell_clip
+        or cell_vals.min() < -self.quantized_cell_clip):
+      print(f'WARNING: cell values clip to {self.quantized_cell_clip}!')
+
+    self.cell_state_tensor.quantized_data = np.round(
+        np.clip(cell_vals, -self.quantized_cell_clip,
+                self.quantized_cell_clip))
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
index f8b231349..0aabf678a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,1367 +13,149 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <cmath>
-#include <cstddef>
+// Integer version of unidirectional sequence lstm. Only the standard LSTM
+// (defined in the keras LSTM layer, e.g., no peephole etc.) is supported here.
+// Currently used by the 16 bits activation case only
+
+#include <algorithm>
+#include <limits>
 
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/lstm_eval.h"
 #include "tensorflow/lite/micro/kernels/lstm_shared.h"
-#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h"
-#include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
 namespace {
+/*Helper Functions*/
 
-constexpr int scratch_index_size = 12;
-
-struct UnidirectionalSequenceLstmOpData {
-  // If the lstm is layer norm.
-  bool use_layer_norm;
-  // The scratch index.
-  int scratch_index[scratch_index_size];
-
-  int32_t row_sums_size;
-  int32_t* row_sums;
-  bool compute_row_sums = false;
-
-  int32_t input_zero_point;
-  int32_t output_state_zero_point;
-
-  IntegerLstmParameter integer_lstm_param;
-};
-
-TfLiteStatus PopulateQuantizedLstmParams8x8_16(
-    TfLiteContext* context, TfLiteNode* node,
-    IntegerLstmParameter* integer_lstm_param) {
-  MicroContext* micro_context = GetMicroContext(context);
-
-  // Calculate quantized clip for projection and cell.
-  const auto* params =
-      static_cast<TfLiteUnidirectionalSequenceLSTMParams*>(node->builtin_data);
-  const float cell_clip = params->cell_clip;
-  const float proj_clip = params->proj_clip;
-
-  TfLiteTensor* cell_state =
-      micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor);
-  TF_LITE_ENSURE(context, cell_state != nullptr);
-  TF_LITE_ENSURE(context, cell_state->is_variable);
-  TfLiteTensor* output_tensor =
-      micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor);
-
-  TF_LITE_ENSURE(context,
-                 cell_state->quantization.type != kTfLiteNoQuantization);
-  auto* cell_state_params =
-      static_cast<TfLiteAffineQuantization*>(cell_state->quantization.params);
-  TF_LITE_ENSURE(context,
-                 output_tensor->quantization.type != kTfLiteNoQuantization);
-  auto* proj_params = static_cast<TfLiteAffineQuantization*>(
-      output_tensor->quantization.params);
-  if (cell_clip > 0.0f) {
-    integer_lstm_param->quantized_cell_clip = static_cast<int16_t>(std::min(
-        std::max(cell_clip / cell_state_params->scale->data[0], -32768.0f),
-        32767.0f));
-  } else {
-    integer_lstm_param->quantized_cell_clip = 0;
-  }
-  if (proj_clip > 0.0f) {
-    integer_lstm_param->quantized_proj_clip = static_cast<int8_t>(std::min(
-        std::max(proj_clip / proj_params->scale->data[0], -128.0f), 127.0f));
-  } else {
-    integer_lstm_param->quantized_proj_clip = 0;
-  }
-
-  // Calculate effective scales.
-  UnidirectionalSequenceLstmOpData* op_data =
-      static_cast<UnidirectionalSequenceLstmOpData*>(node->user_data);
-  const bool use_layer_norm = op_data->use_layer_norm;
-
-  TfLiteTensor* input =
-      micro_context->AllocateTempInputTensor(node, kLstmInputTensor);
-
-  TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToInputWeightsTensor);
-  TfLiteTensor* input_to_forget_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToForgetWeightsTensor);
-  TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToCellWeightsTensor);
-  TfLiteTensor* input_to_output_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToOutputWeightsTensor);
-
-  TfLiteTensor* recurrent_to_input_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToInputWeightsTensor);
-  TfLiteTensor* recurrent_to_forget_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToForgetWeightsTensor);
-  TfLiteTensor* recurrent_to_cell_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmRecurrentToCellWeightsTensor);
-  TfLiteTensor* recurrent_to_output_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToOutputWeightsTensor);
-
-  TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToInputWeightsTensor);
-  TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToForgetWeightsTensor);
-  TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToOutputWeightsTensor);
-
-  TfLiteTensor* input_layer_norm_coefficients =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmInputLayerNormCoefficientsTensor);
-  TfLiteTensor* forget_layer_norm_coefficients =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmForgetLayerNormCoefficientsTensor);
-  TfLiteTensor* cell_layer_norm_coefficients =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmCellLayerNormCoefficientsTensor);
-  TfLiteTensor* output_layer_norm_coefficients =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmOutputLayerNormCoefficientsTensor);
-
-  TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmProjectionWeightsTensor);
-
-  TfLiteTensor* output_state =
-      micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor);
-  TF_LITE_ENSURE(context, output_state != nullptr);
-  TF_LITE_ENSURE(context, output_state->is_variable);
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-  const bool use_projection = (projection_weights != nullptr);
-
-  // Get intermediate scales and zero points.
-  float intermediate_scale[5];
-  int32_t intermediate_zp[5];
-  for (int i = 0; i < 4; ++i) {
-    if (use_layer_norm) {
-      TfLiteTensor* intermediate =
-          micro_context->AllocateTempIntermediateTensor(node, i);
-      TF_LITE_ENSURE(context,
-                     intermediate->quantization.type != kTfLiteNoQuantization);
-      auto* params_intermediate = static_cast<TfLiteAffineQuantization*>(
-          intermediate->quantization.params);
-      intermediate_scale[i] = params_intermediate->scale->data[0];
-      intermediate_zp[i] = params_intermediate->zero_point->data[0];
-      if (intermediate != nullptr) {
-        micro_context->DeallocateTempTfLiteTensor(intermediate);
-      }
-    } else {
-      // Q3.12 for activation functions.
-      intermediate_scale[i] = std::pow(2.0f, -12.0f);
-      intermediate_zp[i] = 0;
-    }
-  }
-  // In the absence of projection, hidden becomes otuput and this intermediate
-  // is ignored.
-  TfLiteTensor* hidden = micro_context->AllocateTempIntermediateTensor(node, 4);
-  TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization);
-  auto* hidden_params =
-      static_cast<TfLiteAffineQuantization*>(hidden->quantization.params);
-  intermediate_scale[4] = hidden_params->scale->data[0];
-  intermediate_zp[4] = hidden_params->zero_point->data[0];
-  if (hidden != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(hidden);
-  }
-
-  // Scales.
-  const float default_scale = 1.0;
-  float input_scale = default_scale;
-  float input_to_input_weight_scale = default_scale;
-  float recurrent_to_input_weight_scale = default_scale;
-  float cell_to_input_weight_scale = default_scale;
-  float input_to_forget_weight_scale = default_scale;
-  float recurrent_to_forget_weight_scale = default_scale;
-  float cell_to_forget_weight_scale = default_scale;
-  float input_to_cell_weight_scale = default_scale;
-  float recurrent_to_cell_weight_scale = default_scale;
-  float input_to_output_weight_scale = default_scale;
-  float recurrent_to_output_weight_scale = default_scale;
-  float cell_to_output_weight_scale = default_scale;
-  float projection_weight_scale = default_scale;
-  float layer_norm_input_scale = default_scale;
-  float layer_norm_forget_scale = default_scale;
-  float layer_norm_cell_scale = default_scale;
-  float layer_norm_output_scale = default_scale;
-  float output_state_scale = default_scale;
-  int cell_scale = 1;
-
-  // Effective scales.
-  float effective_input_to_input_scale = default_scale;
-  float effective_recurrent_to_input_scale = default_scale;
-  float effective_cell_to_input_scale = default_scale;
-  float effective_input_to_forget_scale = default_scale;
-  float effective_recurrent_to_forget_scale = default_scale;
-  float effective_cell_to_forget_scale = default_scale;
-  float effective_input_to_cell_scale = default_scale;
-  float effective_recurrent_to_cell_scale = default_scale;
-  float effective_input_to_output_scale = default_scale;
-  float effective_recurrent_to_output_scale = default_scale;
-  float effective_cell_to_output_scale = default_scale;
-  float effective_proj_scale = default_scale;
-  float effective_hidden_scale = default_scale;
-
-  // Populate scales.
-  if (!use_cifg) {
-    input_to_input_weight_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weight_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weight_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weight_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weight_scale = cell_to_output_weights->params.scale;
-  }
-
-  if (use_layer_norm) {
-    if (!use_cifg) {
-      layer_norm_input_scale = input_layer_norm_coefficients->params.scale;
-    }
-    layer_norm_forget_scale = forget_layer_norm_coefficients->params.scale;
-    layer_norm_cell_scale = cell_layer_norm_coefficients->params.scale;
-    layer_norm_output_scale = output_layer_norm_coefficients->params.scale;
-  }
-
-  if (use_projection) {
-    projection_weight_scale = projection_weights->params.scale;
-  }
-  output_state_scale = output_state->params.scale;
-
-  input_to_forget_weight_scale = input_to_forget_weights->params.scale;
-  input_to_cell_weight_scale = input_to_cell_weights->params.scale;
-  input_to_output_weight_scale = input_to_output_weights->params.scale;
-  recurrent_to_forget_weight_scale = recurrent_to_forget_weights->params.scale;
-  recurrent_to_cell_weight_scale = recurrent_to_cell_weights->params.scale;
-  recurrent_to_output_weight_scale = recurrent_to_output_weights->params.scale;
-
-  // Check cell state (already used above)
-  TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale));
-  // TF_LITE_ENSURE(context, cell_scale <= -9);
-  integer_lstm_param->cell_scale = cell_scale;
-  input_scale = input->params.scale;
-
-  // Calculate effective scales.
-  if (!use_cifg) {
-    effective_input_to_input_scale =
-        input_to_input_weight_scale * input_scale / intermediate_scale[0];
-    effective_recurrent_to_input_scale = recurrent_to_input_weight_scale *
-                                         output_state_scale /
-                                         intermediate_scale[0];
-  }
-  effective_input_to_forget_scale =
-      input_to_forget_weight_scale * input_scale / intermediate_scale[1];
-  effective_recurrent_to_forget_scale = recurrent_to_forget_weight_scale *
-                                        output_state_scale /
-                                        intermediate_scale[1];
-
-  effective_input_to_cell_scale =
-      input_to_cell_weight_scale * input_scale / intermediate_scale[2];
-  effective_recurrent_to_cell_scale = recurrent_to_cell_weight_scale *
-                                      output_state_scale /
-                                      intermediate_scale[2];
-
-  effective_input_to_output_scale =
-      input_to_output_weight_scale * input_scale / intermediate_scale[3];
-  effective_recurrent_to_output_scale = recurrent_to_output_weight_scale *
-                                        output_state_scale /
-                                        intermediate_scale[3];
-
-  effective_hidden_scale =
-      std::pow(2.0f, -15.0f) / intermediate_scale[4] * std::pow(2.0f, -15.0f);
-
-  effective_proj_scale =
-      projection_weight_scale * intermediate_scale[4] / output_state_scale;
-
-  if (use_peephole) {
-    if (!use_cifg) {
-      effective_cell_to_input_scale =
-          std::pow(2.0f, static_cast<float>(cell_scale)) *
-          cell_to_input_weight_scale / intermediate_scale[0];
-    }
-    effective_cell_to_forget_scale =
-        std::pow(2.0f, static_cast<float>(cell_scale)) *
-        cell_to_forget_weight_scale / intermediate_scale[1];
-    effective_cell_to_output_scale =
-        std::pow(2.0f, static_cast<float>(cell_scale)) *
-        cell_to_output_weight_scale / intermediate_scale[3];
-  }
-
-  // Decompose scales.
-  int shift_output;
-  QuantizeMultiplier(static_cast<double>(effective_input_to_input_scale),
-                     &integer_lstm_param->effective_input_to_input_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_input_to_input_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_recurrent_to_input_scale),
-                     &integer_lstm_param->effective_recurrent_to_input_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_recurrent_to_input_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_cell_to_input_scale),
-                     &integer_lstm_param->effective_cell_to_input_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_cell_to_input_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_input_to_forget_scale),
-                     &integer_lstm_param->effective_input_to_forget_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_input_to_forget_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_recurrent_to_forget_scale),
-                     &integer_lstm_param->effective_recurrent_to_forget_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_recurrent_to_forget_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_cell_to_forget_scale),
-                     &integer_lstm_param->effective_cell_to_forget_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_cell_to_forget_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_input_to_cell_scale),
-                     &integer_lstm_param->effective_input_to_cell_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_input_to_cell_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_recurrent_to_cell_scale),
-                     &integer_lstm_param->effective_recurrent_to_cell_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_recurrent_to_cell_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_input_to_output_scale),
-                     &integer_lstm_param->effective_input_to_output_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_input_to_output_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_recurrent_to_output_scale),
-                     &integer_lstm_param->effective_recurrent_to_output_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_recurrent_to_output_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_cell_to_output_scale),
-                     &integer_lstm_param->effective_cell_to_output_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_cell_to_output_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_proj_scale),
-                     &integer_lstm_param->effective_proj_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_proj_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(effective_hidden_scale),
-                     &integer_lstm_param->effective_hidden_scale_a,
-                     &shift_output);
-  integer_lstm_param->effective_hidden_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(layer_norm_input_scale),
-                     &integer_lstm_param->layer_norm_input_scale_a,
-                     &shift_output);
-  integer_lstm_param->layer_norm_input_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(layer_norm_forget_scale),
-                     &integer_lstm_param->layer_norm_forget_scale_a,
-                     &shift_output);
-  integer_lstm_param->layer_norm_forget_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(layer_norm_cell_scale),
-                     &integer_lstm_param->layer_norm_cell_scale_a,
-                     &shift_output);
-  integer_lstm_param->layer_norm_cell_scale_b =
-      static_cast<int32_t>(shift_output);
-  QuantizeMultiplier(static_cast<double>(layer_norm_output_scale),
-                     &integer_lstm_param->layer_norm_output_scale_a,
-                     &shift_output);
-  integer_lstm_param->layer_norm_output_scale_b =
-      static_cast<int32_t>(shift_output);
-
-  integer_lstm_param->hidden_zp = intermediate_zp[4];
-
-  // 10000 is used to make sure the kernel logic does not overflow.
-  if (!use_cifg) {
-    integer_lstm_param->input_variance_guard =
-        std::max(1, static_cast<int>(10000 * layer_norm_input_scale));
-  }
-  integer_lstm_param->forget_variance_guard =
-      std::max(1, static_cast<int>(10000 * layer_norm_forget_scale));
-  integer_lstm_param->cell_variance_guard =
-      std::max(1, static_cast<int>(10000 * layer_norm_cell_scale));
-  integer_lstm_param->output_variance_guard =
-      std::max(1, static_cast<int>(10000 * layer_norm_output_scale));
-
-  if (cell_state != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_state);
-  }
-  if (output_tensor != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_tensor);
-  }
-  if (input != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input);
-  }
-  if (input_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_input_weights);
-  }
-  if (input_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights);
-  }
-  if (input_to_cell_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights);
-  }
-  if (input_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_output_weights);
-  }
-  if (recurrent_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights);
-  }
-  if (recurrent_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights);
-  }
-  if (recurrent_to_cell_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights);
-  }
-  if (recurrent_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights);
-  }
-  if (cell_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights);
-  }
-  if (cell_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights);
-  }
-  if (cell_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights);
-  }
-  if (input_layer_norm_coefficients != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients);
-  }
-  if (forget_layer_norm_coefficients != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients);
-  }
-  if (cell_layer_norm_coefficients != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients);
-  }
-  if (output_layer_norm_coefficients != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients);
-  }
-  if (projection_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(projection_weights);
-  }
-  if (output_state != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_state);
-  }
-
-  return kTfLiteOk;
-}
-
-// Temporary buffers used for hybrid mode
-enum HybridTempBuffer {
-  kPrimaryScratchBuffer = 0,
-  kInputQuantized = 1,
-  kOutputStateQuantized = 2,
-  kCellStateQuantized = 3,
-  kInputScalingFactors = 4,
-  kOutputStateScalingFactors = 5,
-  kProductScalingFactors = 6,
-  kRecoveredCellWeights = 7,
-  kAccumScratch = 8,
-  kInputZeroPoints = 9,
-  kOutputStateZeroPoints = 10,
-  kScales = 11,
-  kNumHybridTempBuffers = 12,
-};
+/*Kernel functions*/
 
 void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer,
                                      size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(
-      context, sizeof(UnidirectionalSequenceLstmOpData));
-}
-
-// Check that input tensor dimensions matches with each other.
-TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
-                                        TfLiteNode* node, int n_input,
-                                        int n_output, int n_cell,
-                                        bool use_layer_norm, bool is_integer) {
-  MicroContext* micro_context = GetMicroContext(context);
-
-  const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
-
-  // Making sure clipping parameters have valid values.
-  // == 0 means no clipping
-  //  > 0 means clipping
-  TF_LITE_ENSURE(context, params->cell_clip >= 0);
-  TF_LITE_ENSURE(context, params->proj_clip >= 0);
-
-  TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToInputWeightsTensor);
-  if (input_to_input_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
-    TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
-    TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
-  }
-
-  TfLiteTensor* input_to_forget_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToForgetWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input);
-
-  TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToCellWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell);
-  TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input);
-
-  TfLiteTensor* recurrent_to_input_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToInputWeightsTensor);
-  if (recurrent_to_input_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
-    TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0],
-                      n_cell);
-    TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1],
-                      n_output);
-  }
-
-  TfLiteTensor* recurrent_to_forget_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToForgetWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[0],
-                    n_cell);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1],
-                    n_output);
-
-  TfLiteTensor* recurrent_to_cell_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmRecurrentToCellWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1],
-                    n_output);
-
-  // We make sure the input-gate's parameters are either both present (regular
-  // LSTM) or not at all (CIFG-LSTM).
-  const bool cifg_weights_all_or_none =
-      ((input_to_input_weights != nullptr) &&
-       (recurrent_to_input_weights != nullptr)) ||
-      ((input_to_input_weights == nullptr) &&
-       (recurrent_to_input_weights == nullptr));
-  TF_LITE_ENSURE(context, cifg_weights_all_or_none == true);
-
-  TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToInputWeightsTensor);
-  if (cell_to_input_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
-    TF_LITE_ENSURE_TYPES_EQ(
-        context, cell_to_input_weights->type,
-        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
-  }
-
-  TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToForgetWeightsTensor);
-  if (cell_to_forget_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
-    TF_LITE_ENSURE_TYPES_EQ(
-        context, cell_to_forget_weights->type,
-        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
-  }
-
-  TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmCellToOutputWeightsTensor);
-  if (cell_to_output_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell);
-    TF_LITE_ENSURE_TYPES_EQ(
-        context, cell_to_output_weights->type,
-        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
-  }
-
-  // Making sure the peephole weights are there all or none.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool peephole_weights_all_or_none =
-      ((cell_to_input_weights != nullptr || use_cifg) &&
-       (cell_to_forget_weights != nullptr) &&
-       (cell_to_output_weights != nullptr)) ||
-      ((cell_to_input_weights == nullptr) &&
-       (cell_to_forget_weights == nullptr) &&
-       (cell_to_output_weights == nullptr));
-  TF_LITE_ENSURE(context, peephole_weights_all_or_none == true);
-
-  // Make sure the input gate bias is present only when not a CIFG-LSTM.
-  TfLiteTensor* input_gate_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmInputGateBiasTensor);
-  if (use_cifg) {
-    TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr);
-  } else {
-    TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell);
-    if (is_integer) {
-      TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteInt32);
-    } else {
-      TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteFloat32);
-    }
-  }
-
-  TfLiteTensor* forget_gate_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmForgetGateBiasTensor);
-  TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1);
-  TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell);
-  if (is_integer) {
-    TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteInt32);
-  } else {
-    TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteFloat32);
-  }
-
-  TfLiteTensor* cell_gate_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmCellGateBiasTensor);
-  TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->size, 1);
-  TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->data[0], n_cell);
-  if (is_integer) {
-    TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteInt32);
-  } else {
-    TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteFloat32);
-  }
-
-  TfLiteTensor* output_gate_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmOutputGateBiasTensor);
-  TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
-  TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
-  if (is_integer) {
-    TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteInt32);
-  } else {
-    TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteFloat32);
-  }
-
-  TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmProjectionWeightsTensor);
-  if (projection_weights != nullptr) {
-    TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
-    TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output);
-    TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
-  }
-
-  TfLiteTensor* projection_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor);
-  if (projection_bias != nullptr) {
-    TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output);
-    if (is_integer) {
-      TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteInt32);
-    } else {
-      TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteFloat32);
-    }
-  }
-
-  // Making sure the projection tensors are consistent:
-  // 1) If projection weight is not present, then projection bias should not be
-  // present.
-  // 2) If projection weight is present, then projection bias is optional.
-  const bool projecton_tensors_consistent =
-      ((projection_weights != nullptr) || (projection_bias == nullptr));
-  TF_LITE_ENSURE(context, projecton_tensors_consistent == true);
-
-  if (use_layer_norm) {
-    TfLiteTensor* input_layer_norm_coefficients =
-        micro_context->AllocateTempInputTensor(
-            node, kLstmInputLayerNormCoefficientsTensor);
-    if (use_cifg) {
-      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients, nullptr);
-    } else {
-      TF_LITE_ENSURE(context, input_layer_norm_coefficients != nullptr);
-      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1);
-      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0],
-                        n_cell);
-      if (is_integer) {
-        TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type,
-                                kTfLiteInt16);
-      } else {
-        TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type,
-                                kTfLiteFloat32);
-      }
-    }
-
-    TfLiteTensor* forget_layer_norm_coefficients =
-        micro_context->AllocateTempInputTensor(
-            node, kLstmForgetLayerNormCoefficientsTensor);
-    TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0],
-                      n_cell);
-    if (is_integer) {
-      TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type,
-                              kTfLiteInt16);
-    } else {
-      TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type,
-                              kTfLiteFloat32);
-    }
-
-    TfLiteTensor* cell_layer_norm_coefficients =
-        micro_context->AllocateTempInputTensor(
-            node, kLstmCellLayerNormCoefficientsTensor);
-    TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0],
-                      n_cell);
-    if (is_integer) {
-      TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type,
-                              kTfLiteInt16);
-    } else {
-      TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type,
-                              kTfLiteFloat32);
-    }
-
-    TfLiteTensor* output_layer_norm_coefficients =
-        micro_context->AllocateTempInputTensor(
-            node, kLstmOutputLayerNormCoefficientsTensor);
-    TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1);
-    TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0],
-                      n_cell);
-    if (is_integer) {
-      TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type,
-                              kTfLiteInt16);
-    } else {
-      TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type,
-                              kTfLiteFloat32);
-    }
-    if (input_layer_norm_coefficients != nullptr) {
-      micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients);
-    }
-    if (forget_layer_norm_coefficients != nullptr) {
-      micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients);
-    }
-    if (cell_layer_norm_coefficients != nullptr) {
-      micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients);
-    }
-    if (output_layer_norm_coefficients != nullptr) {
-      micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients);
-    }
-  }
-
-  if (input_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_input_weights);
-  }
-  if (input_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights);
-  }
-  if (input_to_cell_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights);
-  }
-  if (recurrent_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights);
-  }
-  if (recurrent_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights);
-  }
-  micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights);
-  if (cell_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights);
-  }
-  if (cell_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights);
-  }
-  if (cell_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights);
-  }
-  if (input_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_gate_bias);
-  }
-  if (forget_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(forget_gate_bias);
-  }
-  if (cell_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_gate_bias);
-  }
-  if (output_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_gate_bias);
-  }
-  if (projection_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(projection_weights);
-  }
-  if (projection_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(projection_bias);
-  }
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus PrecomputeZeroPointTimesWeightWithBias(
-    TfLiteContext* context, int32_t zero_point,
-    const TfLiteTensor* weight_tensor, const TfLiteTensor* bias_tensor,
-    int32_t** output) {
-  if (weight_tensor == nullptr) {
-    return kTfLiteOk;
-  }
-
-  const RuntimeShape& weight_shape = GetTensorShape(weight_tensor);
-  TF_LITE_ENSURE_EQ(context, weight_shape.DimensionsCount(), 2);
-  const int row = weight_shape.Dims(0);
-  const int col = weight_shape.Dims(1);
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  *output = static_cast<int32_t*>(
-      context->AllocatePersistentBuffer(context, row * sizeof(int32_t)));
-
-  if (bias_tensor == nullptr) {
-    memset(*output, 0, row * sizeof(int32_t));
-  } else {
-    const int32_t* bias = GetTensorData<int32_t>(bias_tensor);
-    memcpy(*output, bias, row * sizeof(int32_t));
-  }
-  if (zero_point != 0) {
-    const int8_t* weight = GetTensorData<int8_t>(weight_tensor);
-    tflite::tensor_utils::MatrixScalarMultiplyAccumulate(weight, zero_point,
-                                                         row, col, *output);
-  }
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataLSTM));
 }
 
-TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(
-    TfLiteContext* context, UnidirectionalSequenceLstmOpData* op_data,
-    TfLiteNode* node) {
-  MicroContext* micro_context = GetMicroContext(context);
-
-  TfLiteTensor* input =
-      micro_context->AllocateTempInputTensor(node, kLstmInputTensor);
-  TfLiteTensor* output_state =
-      micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor);
-  TF_LITE_ENSURE(context, output_state != nullptr);
-  TF_LITE_ENSURE(context, output_state->is_variable);
-
-  const int32_t input_zero_point = -input->params.zero_point;
-  const int32_t output_state_zero_point = -output_state->params.zero_point;
-
-  TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToInputWeightsTensor);
-  TfLiteTensor* input_to_forget_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToForgetWeightsTensor);
-  TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToCellWeightsTensor);
-  TfLiteTensor* input_to_output_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToOutputWeightsTensor);
-
-  TfLiteTensor* recurrent_to_input_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToInputWeightsTensor);
-  TfLiteTensor* recurrent_to_forget_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToForgetWeightsTensor);
-  TfLiteTensor* recurrent_to_cell_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmRecurrentToCellWeightsTensor);
-  TfLiteTensor* recurrent_to_output_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToOutputWeightsTensor);
-
-  TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmProjectionWeightsTensor);
-  TfLiteTensor* projection_bias =
-      micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor);
-
-  IntegerLstmParameter* integer_lstm_params = &op_data->integer_lstm_param;
-
-  TfLiteTensor* intermediate =
-      micro_context->AllocateTempIntermediateTensor(node, 4);
-  TF_LITE_ENSURE(context,
-                 intermediate->quantization.type != kTfLiteNoQuantization);
-  const auto* params =
-      static_cast<TfLiteAffineQuantization*>(intermediate->quantization.params);
-  const int32_t hidden_zp = params->zero_point->data[0];
-
-  // Get bias and perform zero point calculation.
-  // When there is layer normalization, the gate bias does not apply to matmul
-  // directly:
-  //      y = ln(w * x + w * r + w * c) + b.
-  const bool is_layer_norm = op_data->use_layer_norm;
-
-  // Forget gate.
-  TfLiteTensor* forget_gate_bias = is_layer_norm
-                                       ? nullptr
-                                       : micro_context->AllocateTempInputTensor(
-                                             node, kLstmForgetGateBiasTensor);
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, input_zero_point, input_to_forget_weights, forget_gate_bias,
-          &(integer_lstm_params->input_to_forget_effective_bias)));
-
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, output_state_zero_point, recurrent_to_forget_weights,
-          nullptr, &(integer_lstm_params->recurrent_to_forget_effective_bias)));
-
-  // Modulation gate.
-  TfLiteTensor* cell_gate_bias = is_layer_norm
-                                     ? nullptr
-                                     : micro_context->AllocateTempInputTensor(
-                                           node, kLstmCellGateBiasTensor);
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, input_zero_point, input_to_cell_weights, cell_gate_bias,
-          &(integer_lstm_params->input_to_cell_effective_bias)));
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, output_state_zero_point, recurrent_to_cell_weights, nullptr,
-          &(integer_lstm_params->recurrent_to_cell_effective_bias)));
-
-  // Output gate.
-  TfLiteTensor* output_gate_bias = is_layer_norm
-                                       ? nullptr
-                                       : micro_context->AllocateTempInputTensor(
-                                             node, kLstmOutputGateBiasTensor);
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, input_zero_point, input_to_output_weights, output_gate_bias,
-          &(integer_lstm_params->input_to_output_effective_bias)));
-
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, output_state_zero_point, recurrent_to_output_weights,
-          nullptr, &(integer_lstm_params->recurrent_to_output_effective_bias)));
-
-  // Input gate. The calculation is only meaningful for non-cifg case.
-  TfLiteTensor* input_gate_bias = is_layer_norm
-                                      ? nullptr
-                                      : micro_context->AllocateTempInputTensor(
-                                            node, kLstmInputGateBiasTensor);
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, input_zero_point, input_to_input_weights, input_gate_bias,
-          &(integer_lstm_params->input_to_input_effective_bias)));
-  TF_LITE_ENSURE_OK(
-      context,
-      PrecomputeZeroPointTimesWeightWithBias(
-          context, output_state_zero_point, recurrent_to_input_weights, nullptr,
-          &(integer_lstm_params->recurrent_to_input_effective_bias)));
-
-  // Projection bias. The calculation is only meaningful for with projection.
-  TF_LITE_ENSURE_OK(context,
-                    PrecomputeZeroPointTimesWeightWithBias(
-                        context, hidden_zp, projection_weights, projection_bias,
-                        &(integer_lstm_params->projection_effective_bias)));
-
-  if (input != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input);
-  }
-  if (output_state != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_state);
-  }
-  if (input_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_input_weights);
-  }
-  if (input_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights);
-  }
-  if (input_to_cell_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights);
-  }
-  if (input_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_output_weights);
-  }
-  if (recurrent_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights);
-  }
-  if (recurrent_to_forget_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights);
-  }
-  if (recurrent_to_cell_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights);
-  }
-  if (recurrent_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights);
-  }
-  if (projection_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(projection_weights);
-  }
-  if (projection_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(projection_bias);
-  }
-  if (forget_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(forget_gate_bias);
-  }
-  if (cell_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_gate_bias);
-  }
-  if (output_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_gate_bias);
-  }
-  if (input_gate_bias != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_gate_bias);
-  }
-
-  if (intermediate != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(intermediate);
-  }
-
-  return kTfLiteOk;
-}
-
-// Resize the output and  state tensors based on the sizes of the input tensors.
-// Allocate a temporary scratch tensor. Also check that the sizes of the input
-// tensors match each other.
 TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context,
                                                TfLiteNode* node) {
-  UnidirectionalSequenceLstmOpData* op_data =
-      reinterpret_cast<UnidirectionalSequenceLstmOpData*>(node->user_data);
-
-  MicroContext* micro_context = GetMicroContext(context);
-
-  // Check we have all the inputs and outputs we need.
-  bool use_layer_norm = false;
-  if (node->inputs->size == 24) {
-    TfLiteTensor* forget_layer_norm_coefficients =
-        micro_context->AllocateTempInputTensor(
-            node, kLstmForgetLayerNormCoefficientsTensor);
-    if (forget_layer_norm_coefficients == nullptr) {
-      use_layer_norm = false;
-    } else {
-      use_layer_norm = true;
-    }
-    if (forget_layer_norm_coefficients != nullptr) {
-      micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients);
-    }
-  } else if (node->inputs->size == 20) {
-    // This is deprecated and is only kept here for backward compatibility.
-    use_layer_norm = false;
-  } else {
-    MicroPrintf("The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs",
-                node->inputs->size);
-    return kTfLiteError;
-  }
   TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-  op_data->use_layer_norm = use_layer_norm;
-
-  // Inferring batch size, number of outputs and sequence length and
-  // number of cells from the input tensors.
-  TfLiteTensor* input =
-      micro_context->AllocateTempInputTensor(node, kLstmInputTensor);
-  op_data->input_zero_point = input->params.zero_point;
-  const bool is_integer = input->type == kTfLiteInt8;
-  TF_LITE_ENSURE(context, input->dims->size > 1);
-  const auto* params =
-      reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
-          node->builtin_data);
-  const bool time_major = params->time_major;
-  const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0];
-  const int n_input = input->dims->data[2];
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 24);
 
-  TfLiteTensor* input_to_output_weights =
-      micro_context->AllocateTempInputTensor(node,
-                                             kLstmInputToOutputWeightsTensor);
-  const int n_cell = input_to_output_weights->dims->data[0];
-  TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  TFLITE_DCHECK(node->user_data != nullptr);
 
-  TfLiteTensor* recurrent_to_output_weights =
-      micro_context->AllocateTempInputTensor(
-          node, kLstmRecurrentToOutputWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->data[0],
-                    n_cell);
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Check that input tensor dimensions matches with each other.
+  OpDataLSTM* op_data = reinterpret_cast<OpDataLSTM*>(node->user_data);
+  const auto* builtin_data =
+      static_cast<TfLiteUnidirectionalSequenceLSTMParams*>(node->builtin_data);
+  // All TempTfLiteTensors will be deallocated through the destructor.
+  LstmTensors lstm_tensors(context, node);
+  TF_LITE_ENSURE_OK(context, lstm_tensors.ValidateTensorStatus(context));
+
+  op_data->cell_gate_nonlinear_type = builtin_data->activation;
+  op_data->size_info =
+      CreateLstmSizeInfo(builtin_data->time_major,
+                         lstm_tensors.GetInternalTensor(kLstmInputTensor)->dims,
+                         lstm_tensors.HiddenStateTensor()->dims);
   TF_LITE_ENSURE_OK(
-      context, CheckInputTensorDimensions(context, node, n_input, n_output,
-                                          n_cell, use_layer_norm, is_integer));
-
-  // Get the pointer to output, output_state and cell_state buffer tensors.
-  TfLiteTensor* output =
-      micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor);
-
-  TfLiteTensor* output_state =
-      micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor);
-  TF_LITE_ENSURE(context, output_state != nullptr);
-  TF_LITE_ENSURE(context, output_state->is_variable);
-  op_data->output_state_zero_point = output_state->params.zero_point;
-  TfLiteTensor* cell_state =
-      micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor);
-  TF_LITE_ENSURE(context, cell_state != nullptr);
-  TF_LITE_ENSURE(context, cell_state->is_variable);
-
-  // Check the shape of input state tensors.
-  // These tensor may be 1D or 2D. It's fine as long as the total size is
-  // correct.
-  TF_LITE_ENSURE_EQ(context, NumElements(output_state), n_batch * n_output);
-  TF_LITE_ENSURE_EQ(context, NumElements(cell_state), n_batch * n_cell);
-
-  // Check the shape of output tensor against that of input tensor
-  TF_LITE_ENSURE_EQ(context, output->dims->size, 3);
-  TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
-  TF_LITE_ENSURE_EQ(context, input->dims->data[1], output->dims->data[1]);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[2], n_output);
-
-  if (is_integer) {
-    const int num_intermediate_tensors = node->intermediates->size;
-    TF_LITE_ENSURE(context, num_intermediate_tensors == 5);
-  }
-
-  TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor(
-      node, kLstmInputToInputWeightsTensor);
-
-  const bool use_cifg = (input_to_input_weights == nullptr);
-
-  // Create a primary scratch buffer for hybrid and float
-  // If is_integer, primary scratch buffer has a different size
-  if (!is_integer) {
-    int scratch_buffer_size[2];
-    scratch_buffer_size[0] = n_batch;
-
-    if (use_cifg) {
-      // Reserving space for Cell, Forget, Output gates
-      scratch_buffer_size[1] = n_cell * 3;
-    } else {
-      // Reserving space for Input, Cell, Forget, Output gates
-      scratch_buffer_size[1] = n_cell * 4;
-    }
-
-    TF_LITE_ENSURE_OK(context,
-                      context->RequestScratchBufferInArena(
-                          context,
-                          scratch_buffer_size[0] * scratch_buffer_size[1] *
-                              TfLiteTypeGetSize(input->type),
-                          &(op_data->scratch_index[kPrimaryScratchBuffer])));
-  }
-
-  if (is_integer) {
-    // Integer UnidirectionalSequenceLSTM prepare function for 8x8->16.
-    // This code path needs 5 intermediate tensors per Op.
-    // Populate quantization parameters.
-    PopulateQuantizedLstmParams8x8_16(context, node,
-                                      &op_data->integer_lstm_param);
-    // Allocate scratch buffer. Need 4 16-bit buffer with size n_batch * n_cell
-    // and 1 8-bit buffer with size n_batch * n_cell. For integer
-    // UnidirectionalSequenceLSTM, we do not need the extra 32-bit buffer.
-    for (int i = 0; i < 5; ++i) {
-      TfLiteType buffer_type = kTfLiteInt16;
-
-      if (i == 4) {
-        buffer_type = kTfLiteInt8;
-      }
-
-      TF_LITE_ENSURE_OK(
-          context,
-          context->RequestScratchBufferInArena(
-              context, n_batch * n_cell * TfLiteTypeGetSize(buffer_type),
-              &(op_data->scratch_index[i])));
-    }
-
-    // Populate precomputed zp * weight.
-    TF_LITE_ENSURE_OK(context, PopulatePrecomputedZPTimesWeightsWithBias(
-                                   context, op_data, node));
-  }
-
-  if (input != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input);
-  }
-  if (input_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_output_weights);
-  }
-  if (recurrent_to_output_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights);
-  }
-  if (output != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output);
-  }
-  if (output_state != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(output_state);
-  }
-  if (cell_state != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(cell_state);
+      context, ValidateTensorSize(context, lstm_tensors, op_data->size_info));
+
+  // Create cell state information and gate parameters (Fully Connected and Mul)
+  auto cell_state_type =
+      lstm_tensors.GetInternalTensor(kLstmCellStateTensor)->type;
+  if (cell_state_type == kTfLiteFloat32) {
+    op_data->cell_state_info =
+        CreateLstmCellStateInfoFloat(builtin_data->cell_clip);
+    TF_LITE_ENSURE_OK(
+        context, PrepareGateParametersFloat(context, lstm_tensors, op_data));
+  } else if (cell_state_type == kTfLiteInt16) {
+    op_data->cell_state_info = CreateLstmCellStateInfo(
+        lstm_tensors.CellStateTensor()->params.scale, builtin_data->cell_clip);
+    TF_LITE_ENSURE_OK(
+        context, PrepareGateParametersInteger(context, lstm_tensors, op_data));
+  } else {
+    MicroPrintf(
+        "Cell state type %s (%d) not supported. The quantized Unidirectional "
+        "Sequence LSTM Op only support int16 cell state",
+        TfLiteTypeGetName(cell_state_type), cell_state_type);
+    return kTfLiteError;
   }
-
-  if (input_to_input_weights != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(input_to_input_weights);
+  // request buffers (four buffers)
+  for (size_t i = 0; i < 4; i++) {
+    TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
+                                   context,
+                                   op_data->size_info.batch_size *
+                                       op_data->size_info.state_dimension *
+                                       TfLiteTypeGetSize(cell_state_type),
+                                   &(op_data->buffer_indices[i])));
   }
   return kTfLiteOk;
 }
 
 TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context,
                                             TfLiteNode* node) {
-  TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
-
-  const auto* params =
-      reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
-          node->builtin_data);
-  const UnidirectionalSequenceLstmOpData* op_data =
-      reinterpret_cast<UnidirectionalSequenceLstmOpData*>(node->user_data);
-  const bool use_layer_norm = op_data->use_layer_norm;
-  const bool time_major = params->time_major;
-
-  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kLstmInputTensor);
-
-  const TfLiteEvalTensor* input_to_input_weights = tflite::micro::GetEvalInput(
-      context, node, kLstmInputToInputWeightsTensor);
-
-  const TfLiteEvalTensor* input_to_forget_weights = tflite::micro::GetEvalInput(
-      context, node, kLstmInputToForgetWeightsTensor);
-
-  const TfLiteEvalTensor* input_to_cell_weights =
-      tflite::micro::GetEvalInput(context, node, kLstmInputToCellWeightsTensor);
-
-  const TfLiteEvalTensor* input_to_output_weights = tflite::micro::GetEvalInput(
-      context, node, kLstmInputToOutputWeightsTensor);
-
-  const TfLiteEvalTensor* recurrent_to_input_weights =
-      tflite::micro::GetEvalInput(context, node,
-                                  kLstmRecurrentToInputWeightsTensor);
-
-  const TfLiteEvalTensor* recurrent_to_forget_weights =
-      tflite::micro::GetEvalInput(context, node,
-                                  kLstmRecurrentToForgetWeightsTensor);
-
-  const TfLiteEvalTensor* recurrent_to_cell_weights =
-      tflite::micro::GetEvalInput(context, node,
-                                  kLstmRecurrentToCellWeightsTensor);
-
-  const TfLiteEvalTensor* recurrent_to_output_weights =
-      tflite::micro::GetEvalInput(context, node,
-                                  kLstmRecurrentToOutputWeightsTensor);
-
-  const TfLiteEvalTensor* cell_to_input_weights =
-      tflite::micro::GetEvalInput(context, node, kLstmCellToInputWeightsTensor);
-
-  const TfLiteEvalTensor* cell_to_forget_weights = tflite::micro::GetEvalInput(
-      context, node, kLstmCellToForgetWeightsTensor);
-
-  const TfLiteEvalTensor* cell_to_output_weights = tflite::micro::GetEvalInput(
-      context, node, kLstmCellToOutputWeightsTensor);
-
-  const TfLiteEvalTensor* input_gate_bias =
-      tflite::micro::GetEvalInput(context, node, kLstmInputGateBiasTensor);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpDataLSTM& op_data = *reinterpret_cast<OpDataLSTM*>(node->user_data);
+  auto kernel_content = CreateLSTMKernelContent(context, node);
 
-  const TfLiteEvalTensor* forget_gate_bias =
-      tflite::micro::GetEvalInput(context, node, kLstmForgetGateBiasTensor);
+  const auto activation_type =
+      kernel_content.internal_tensors[kLstmInputTensor]->type;
+  const auto weight_type =
+      kernel_content.internal_tensors[kLstmInputToInputWeightsTensor]->type;
 
-  const TfLiteEvalTensor* cell_gate_bias =
-      tflite::micro::GetEvalInput(context, node, kLstmCellGateBiasTensor);
-
-  const TfLiteEvalTensor* output_gate_bias =
-      tflite::micro::GetEvalInput(context, node, kLstmOutputGateBiasTensor);
-
-  const TfLiteEvalTensor* projection_weights =
-      tflite::micro::GetEvalInput(context, node, kLstmProjectionWeightsTensor);
-
-  const TfLiteEvalTensor* projection_bias =
-      tflite::micro::GetEvalInput(context, node, kLstmProjectionBiasTensor);
-
-  TfLiteEvalTensor* output_state =
-      tflite::micro::GetMutableEvalInput(context, node, kLstmOutputStateTensor);
-
-  TfLiteEvalTensor* cell_state =
-      tflite::micro::GetMutableEvalInput(context, node, kLstmCellStateTensor);
-
-  TFLITE_DCHECK(cell_state != nullptr);
-
-  const TfLiteEvalTensor* input_layer_norm_coefficients =
-      use_layer_norm ? tflite::micro::GetEvalInput(
-                           context, node, kLstmInputLayerNormCoefficientsTensor)
-                     : nullptr;
-  const TfLiteEvalTensor* forget_layer_norm_coefficients =
-      use_layer_norm
-          ? tflite::micro::GetEvalInput(context, node,
-                                        kLstmForgetLayerNormCoefficientsTensor)
-          : nullptr;
-  const TfLiteEvalTensor* cell_layer_norm_coefficients =
-      use_layer_norm ? tflite::micro::GetEvalInput(
-                           context, node, kLstmCellLayerNormCoefficientsTensor)
-                     : nullptr;
-  const TfLiteEvalTensor* output_layer_norm_coefficients =
-      use_layer_norm
-          ? tflite::micro::GetEvalInput(context, node,
-                                        kLstmOutputLayerNormCoefficientsTensor)
-          : nullptr;
-
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kLstmOutputTensor);
-
-  // Copy out the LSTM specific params so they can be passed in the function.
-  TfLiteLSTMParams lstm_params;
-  lstm_params.activation = params->activation;
-  lstm_params.cell_clip = params->cell_clip;
-  lstm_params.proj_clip = params->proj_clip;
-  lstm_params.asymmetric_quantize_inputs = params->asymmetric_quantize_inputs;
-
-  switch (input_to_output_weights->type) {
+  switch (activation_type) {
     case kTfLiteFloat32: {
-      // Index the scratch buffers pointers to the global scratch buffer.
-      return EvalFloatLstm(
-          input, input_to_input_weights, input_to_forget_weights,
-          input_to_cell_weights, input_to_output_weights,
-          recurrent_to_input_weights, recurrent_to_forget_weights,
-          recurrent_to_cell_weights, recurrent_to_output_weights,
-          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_layer_norm_coefficients, forget_layer_norm_coefficients,
-          cell_layer_norm_coefficients, output_layer_norm_coefficients,
-          /*aux_input=*/nullptr,
-          /*aux_input_to_input_weights=*/nullptr,
-          /*aux_input_to_forget_weights=*/nullptr,
-          /*aux_input_to_cell_weights=*/nullptr,
-          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
-          forget_gate_bias, cell_gate_bias, output_gate_bias,
-          projection_weights, projection_bias, &lstm_params,
-          /*forward_sequence=*/true, time_major,
-          /*output_offset=*/0,
-          reinterpret_cast<float*>(context->GetScratchBuffer(
-              context, op_data->scratch_index[kPrimaryScratchBuffer])),
-          output_state, cell_state, output);
-    } break;
-    case kTfLiteUInt8:
+      LSTMBuffers<float> buffers =
+          CreateLSTMBuffers<float>(context, op_data.buffer_indices);
+      EvalLstm<float, float, float, float>(op_data, kernel_content, buffers);
+      break;
+    }
     case kTfLiteInt8: {
-      return EvalInteger8x8_16Lstm(
-          input, input_to_input_weights, input_to_forget_weights,
-          input_to_cell_weights, input_to_output_weights,
-          recurrent_to_input_weights, recurrent_to_forget_weights,
-          recurrent_to_cell_weights, recurrent_to_output_weights,
-          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_layer_norm_coefficients, forget_layer_norm_coefficients,
-          cell_layer_norm_coefficients, output_layer_norm_coefficients,
-          input_gate_bias, forget_gate_bias, cell_gate_bias, output_gate_bias,
-          projection_weights, projection_bias, &lstm_params,
-          /*forward_sequence=*/true, time_major, &op_data->integer_lstm_param,
-          op_data->output_state_zero_point, output_state, cell_state, output,
-          reinterpret_cast<int16_t*>(
-              context->GetScratchBuffer(context, op_data->scratch_index[0])),
-          reinterpret_cast<int16_t*>(
-              context->GetScratchBuffer(context, op_data->scratch_index[1])),
-          reinterpret_cast<int16_t*>(
-              context->GetScratchBuffer(context, op_data->scratch_index[2])),
-          reinterpret_cast<int16_t*>(
-              context->GetScratchBuffer(context, op_data->scratch_index[3])),
-          reinterpret_cast<int8_t*>(
-              context->GetScratchBuffer(context, op_data->scratch_index[4])),
-          nullptr);
-    } break;
-    default:
-      MicroPrintf("Type %s is not currently supported.",
-                  TfLiteTypeGetName(input_to_output_weights->type));
+      switch (weight_type) {
+        case kTfLiteInt8: {
+          // 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias
+          LSTMBuffers<int16_t> buffers =
+              CreateLSTMBuffers<int16_t>(context, op_data.buffer_indices);
+          EvalLstm<int8_t, int8_t, int16_t, int32_t>(op_data, kernel_content,
+                                                     buffers);
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(weight_type), activation_type);
+          return kTfLiteError;
+        }
+      }
+      break;
+    }
+    case kTfLiteInt16: {
+      switch (weight_type) {
+        case kTfLiteInt8: {
+          // 16(activation)x8(weight)->16(cell) LSTM with 64 bits bias
+          LSTMBuffers<int16_t> buffers =
+              CreateLSTMBuffers<int16_t>(context, op_data.buffer_indices);
+          EvalLstm<int16_t, int8_t, int16_t, int64_t>(op_data, kernel_content,
+                                                      buffers);
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(weight_type), weight_type);
+          return kTfLiteError;
+        }
+      }
+      break;
+    }
+    default: {
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(activation_type), activation_type);
       return kTfLiteError;
+    }
   }
+  return kTfLiteOk;
 }
 
 }  // namespace
@@ -1383,5 +165,4 @@ TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() {
                                    UnidirectionalSequenceLstmPrepare,
                                    UnidirectionalSequenceLstmEval);
 }
-
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h
new file mode 100644
index 000000000..00fa29e3a
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h
@@ -0,0 +1,47 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_
+
+#include <cstdint>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+// TODO(b/230666079): resolve conflict with xtensa implementation
+TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
+
+#if defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 activations and int8 weights and uses the latency optimized
+// implementations.
+TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM_INT8();
+
+#else
+inline TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM_INT8() {
+  return Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
+}
+#endif
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc
index b58df2e73..4ade8f3fd 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/unpack.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,9 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace unpack {
+
 namespace {
 
 constexpr int kInputTensor = 0;
@@ -100,13 +98,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   return kTfLiteOk;
 }
+
 }  // namespace
-}  // namespace unpack
 
 TfLiteRegistration Register_UNPACK() {
-  return tflite::micro::RegisterOp(nullptr, nullptr, unpack::Eval);
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc
index 96484330e..0160cb143 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocation_info.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -204,6 +204,14 @@ TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo(
           (current->bytes != 0);
       if (offline_offsets) {
         current->offline_offset = offline_offsets[i];
+
+        // Mark offline planned variable tensors so they can get an offline
+        // offset and be handled offline.
+        if (subgraph->tensors()->Get(i)->is_variable() &&
+            current->offline_offset != kOnlinePlannedBuffer) {
+          current->needs_allocating = true;
+        }
+
       } else {
         current->offline_offset = kOnlinePlannedBuffer;
       }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc
index 3853df307..f78b53a28 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -490,15 +490,6 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(
   TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
       scratch_buffer_handles, scratch_buffer_request_count_));
 
-  // Allocate buffers for variable tensors.
-  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
-       subgraph_idx++) {
-    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
-    TFLITE_DCHECK(subgraph != nullptr);
-    TF_LITE_ENSURE_STATUS(AllocateVariables(
-        subgraph, subgraph_allocations[subgraph_idx].tensors));
-  }
-
   // Plan all subgraphs and scratch buffers together.
   TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations,
                                                *scratch_buffer_handles));
@@ -754,23 +745,27 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
   return kTfLiteOk;
 }
 
-TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
-                                               TfLiteEvalTensor* eval_tensors) {
+TfLiteStatus MicroAllocator::AllocateVariables(
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+    const int32_t* offline_planner_offsets) {
   for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
     auto* tensor = subgraph->tensors()->Get(i);
     if (tensor->is_variable()) {
-      size_t buffer_size;
-      TF_LITE_ENSURE_STATUS(
-          TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
+      if (offline_planner_offsets == nullptr ||
+          offline_planner_offsets[i] == kOnlinePlannedBuffer) {
+        size_t buffer_size;
+        TF_LITE_ENSURE_STATUS(
+            TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
 
-      eval_tensors[i].data.data =
-          persistent_buffer_allocator_->AllocatePersistentBuffer(
-              buffer_size, MicroArenaBufferAlignment());
+        eval_tensors[i].data.data =
+            persistent_buffer_allocator_->AllocatePersistentBuffer(
+                buffer_size, MicroArenaBufferAlignment());
 
-      if (eval_tensors[i].data.data == nullptr) {
-        MicroPrintf("Failed to allocate variable tensor of size %d",
-                    buffer_size);
-        return kTfLiteError;
+        if (eval_tensors[i].data.data == nullptr) {
+          MicroPrintf("Failed to allocate variable tensor of size %d",
+                      buffer_size);
+          return kTfLiteError;
+        }
       }
     }
   }
@@ -819,6 +814,17 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
   const int32_t* offline_planner_offsets = nullptr;
   TF_LITE_ENSURE_STATUS(
       builder.GetOfflinePlannedOffsets(&offline_planner_offsets));
+
+  // We allocate buffers for variable tensors here since the offline planner
+  // offsets are conviently available here.
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+    TF_LITE_ENSURE_STATUS(AllocateVariables(
+        subgraph, allocations[subgraph_idx].tensors, offline_planner_offsets));
+  }
+
   TF_LITE_ENSURE_STATUS(
       builder.InitializeAllocationInfo(offline_planner_offsets, allocations));
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h
index 5cd0e1893..c68c71359 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -247,9 +247,13 @@ class MicroAllocator {
   // for all tensor buffers.
   virtual TfLiteStatus AllocateTfLiteEvalTensors(
       const Model* model, SubgraphAllocations* subgraph_allocations);
+
   // Allocates persistent tensor buffers for variable tensors in the subgraph.
-  virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                         TfLiteEvalTensor* eval_tensors);
+  // Online and offline variable tensors are handled differently hence the
+  // offline_planner_offsets parameter is needed.
+  virtual TfLiteStatus AllocateVariables(
+      const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+      const int32_t* offline_planner_offsets);
 
   // Allocate and return a persistent TfLiteTensor.
   // TODO(b/162311891): Drop this method when the interpreter has an API for
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
index a4d50c83a..b1e46cf92 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -168,8 +168,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddCeil() {
-    return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
-                      ParseCeil);
+    return AddBuiltin(BuiltinOperator_CEIL, Register_CEIL(), ParseCeil);
   }
 
   TfLiteStatus AddCircularBuffer() {
@@ -177,8 +176,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddConcatenation() {
-    return AddBuiltin(BuiltinOperator_CONCATENATION,
-                      tflite::ops::micro::Register_CONCATENATION(),
+    return AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(),
                       ParseConcatenation);
   }
 
@@ -227,8 +225,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddEqual() {
-    return AddBuiltin(BuiltinOperator_EQUAL,
-                      tflite::ops::micro::Register_EQUAL(), ParseEqual);
+    return AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), ParseEqual);
   }
 
   TfLiteStatus AddEthosU() {
@@ -253,8 +250,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddFloor() {
-    return AddBuiltin(BuiltinOperator_FLOOR,
-                      tflite::ops::micro::Register_FLOOR(), ParseFloor);
+    return AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR(), ParseFloor);
   }
 
   TfLiteStatus AddFloorDiv() {
@@ -284,13 +280,12 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddGreater() {
-    return AddBuiltin(BuiltinOperator_GREATER,
-                      tflite::ops::micro::Register_GREATER(), ParseGreater);
+    return AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(),
+                      ParseGreater);
   }
 
   TfLiteStatus AddGreaterEqual() {
-    return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
-                      tflite::ops::micro::Register_GREATER_EQUAL(),
+    return AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(),
                       ParseGreaterEqual);
   }
 
@@ -305,8 +300,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
 
   TfLiteStatus AddL2Normalization() {
     return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
-                      tflite::ops::micro::Register_L2_NORMALIZATION(),
-                      ParseL2Normalization);
+                      Register_L2_NORMALIZATION(), ParseL2Normalization);
   }
 
   TfLiteStatus AddL2Pool2D() {
@@ -320,13 +314,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddLess() {
-    return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
-                      ParseLess);
+    return AddBuiltin(BuiltinOperator_LESS, Register_LESS(), ParseLess);
   }
 
   TfLiteStatus AddLessEqual() {
-    return AddBuiltin(BuiltinOperator_LESS_EQUAL,
-                      tflite::ops::micro::Register_LESS_EQUAL(),
+    return AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(),
                       ParseLessEqual);
   }
 
@@ -362,8 +354,8 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddMaximum() {
-    return AddBuiltin(BuiltinOperator_MAXIMUM,
-                      tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
+    return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(),
+                      ParseMaximum);
   }
 
   TfLiteStatus AddMaxPool2D(
@@ -381,8 +373,8 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddMinimum() {
-    return AddBuiltin(BuiltinOperator_MINIMUM,
-                      tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
+    return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(),
+                      ParseMinimum);
   }
 
   TfLiteStatus AddMul(const TfLiteRegistration& registration = Register_MUL()) {
@@ -394,13 +386,12 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddNotEqual() {
-    return AddBuiltin(BuiltinOperator_NOT_EQUAL,
-                      tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
+    return AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(),
+                      ParseNotEqual);
   }
 
   TfLiteStatus AddPack() {
-    return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
-                      ParsePack);
+    return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack);
   }
 
   TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) {
@@ -452,7 +443,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
 
   TfLiteStatus AddResizeNearestNeighbor() {
     return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-                      tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
+                      Register_RESIZE_NEAREST_NEIGHBOR(),
                       ParseResizeNearestNeighbor);
   }
 
@@ -500,13 +491,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddSplit() {
-    return AddBuiltin(BuiltinOperator_SPLIT,
-                      tflite::ops::micro::Register_SPLIT(), ParseSplit);
+    return AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), ParseSplit);
   }
 
   TfLiteStatus AddSplitV() {
-    return AddBuiltin(BuiltinOperator_SPLIT_V,
-                      tflite::ops::micro::Register_SPLIT_V(), ParseSplitV);
+    return AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(), ParseSplitV);
   }
 
   TfLiteStatus AddSqueeze() {
@@ -531,8 +520,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddStridedSlice() {
-    return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
-                      tflite::ops::micro::Register_STRIDED_SLICE(),
+    return AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(),
                       ParseStridedSlice);
   }
 
@@ -550,8 +538,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddTanh() {
-    return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
-                      ParseTanh);
+    return AddBuiltin(BuiltinOperator_TANH, Register_TANH(), ParseTanh);
   }
 
   TfLiteStatus AddTransposeConv() {
@@ -565,14 +552,14 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddUnpack() {
-    return AddBuiltin(BuiltinOperator_UNPACK,
-                      tflite::ops::micro::Register_UNPACK(), ParseUnpack);
+    return AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), ParseUnpack);
   }
 
-  TfLiteStatus AddUnidirectionalSequenceLSTM() {
+  TfLiteStatus AddUnidirectionalSequenceLSTM(
+      const TfLiteRegistration& registration =
+          Register_UNIDIRECTIONAL_SEQUENCE_LSTM()) {
     return AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-                      Register_UNIDIRECTIONAL_SEQUENCE_LSTM(),
-                      ParseUnidirectionalSequenceLSTM);
+                      registration, ParseUnidirectionalSequenceLSTM);
   }
 
   TfLiteStatus AddVarHandle() {
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
index 9f6fc74c9..e9eb5e549 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
@@ -52,7 +52,7 @@ void MicroProfiler::Log() const {
 #if !defined(TF_LITE_STRIP_ERROR_STRINGS)
   for (int i = 0; i < num_events_; ++i) {
     uint32_t ticks = end_ticks_[i] - start_ticks_[i];
-    MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks,
+    MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks,
                 TicksToMs(ticks));
   }
 #endif
@@ -63,7 +63,7 @@ void MicroProfiler::LogCsv() const {
   MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
   for (int i = 0; i < num_events_; ++i) {
     uint32_t ticks = end_ticks_[i] - start_ticks_[i];
-    MicroPrintf("%d,%s,%u", i, tags_[i], ticks);
+    MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks);
   }
 #endif
 }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc
index e6cea845b..767e7d17d 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.cc
@@ -63,6 +63,7 @@ int MicroResourceVariables::CreateIdIfNoneFound(const char* container,
   resource_variables_[resource_id].shared_name = shared_name;
   resource_variables_[resource_id].resource_buffer = nullptr;
   resource_variables_[resource_id].bytes = 0;
+  resource_variables_[resource_id].default_value = 0;
   return resource_id;
 }
 
@@ -96,9 +97,17 @@ TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context,
       MicroPrintf("Failed to allocate resource buffer.");
       return kTfLiteError;
     }
-    // Zero out resource buffers by deafult. Buffers can be initialized to
-    // nonzero values using ASSIGN_VARIABLE.
-    memset(variable.resource_buffer, 0, variable.bytes);
+    // Set resource buffers to the zero_point by default. Buffers can be
+    // initialized to nonzero values using ASSIGN_VARIABLE.
+    // See comment#2 in b/269648474 for more details why we use zero_point.
+    if (tensor->quantization.params != nullptr) {
+      auto* quantization_data = reinterpret_cast<TfLiteAffineQuantization*>(
+          tensor->quantization.params);
+      int8_t zero_point = quantization_data->zero_point[0].data[0];
+      variable.default_value = zero_point;
+    }
+    // TODO(b/269669735): Explains why casting zero_point to int8 and memset.
+    memset(variable.resource_buffer, variable.default_value, variable.bytes);
   }
 
   return kTfLiteOk;
@@ -127,7 +136,8 @@ TfLiteStatus MicroResourceVariables::Assign(int id,
 TfLiteStatus MicroResourceVariables::ResetAll() {
   for (int i = 0; i < num_resource_variables_; i++) {
     MicroResourceVariable variable = resource_variables_[i];
-    memset(variable.resource_buffer, 0, variable.bytes);
+    // TODO(b/269669735): Explains why casting zero_point to int8 and memset.
+    memset(variable.resource_buffer, variable.default_value, variable.bytes);
   }
   return kTfLiteOk;
 }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h
index e8df991c3..fb9917d47 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_resource_variable.h
@@ -69,6 +69,8 @@ class MicroResourceVariables {
 
     // This is only for verifying read size.
     size_t bytes;
+    // Initialization default value
+    int8_t default_value;
   };
 
   MicroResourceVariables(MicroResourceVariable* variables,
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc
index f646d61a2..f41dba61d 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -192,11 +192,12 @@ TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
 }
 
 TfLiteStatus RecordingMicroAllocator::AllocateVariables(
-    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+    const int32_t* offline_planner_offsets) {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
-  TfLiteStatus status =
-      MicroAllocator::AllocateVariables(subgraph, eval_tensors);
+  TfLiteStatus status = MicroAllocator::AllocateVariables(
+      subgraph, eval_tensors, offline_planner_offsets);
 
   RecordAllocationUsage(allocations,
                         recorded_tflite_tensor_variable_buffer_data_);
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h
index 3136fadea..b6f69264d 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/recording_micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -77,8 +77,9 @@ class RecordingMicroAllocator : public MicroAllocator {
       const Model* model, SubgraphAllocations* subgraph_allocations) override;
   TfLiteStatus AllocateTfLiteEvalTensors(
       const Model* model, SubgraphAllocations* subgraph_allocations) override;
-  TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                 TfLiteEvalTensor* eval_tensors) override;
+  TfLiteStatus AllocateVariables(
+      const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+      const int32_t* offline_planner_offsets) override;
   // TODO(b/162311891): Once all kernels have been updated to the new API drop
   // this method. It is only used to record TfLiteTensor persistent allocations.
   TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override;
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD
index c0046847f..10ea6f06a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/BUILD
@@ -1,3 +1,5 @@
+load("@tflm_pip_deps//:requirements.bzl", "requirement")
+
 package(
     default_visibility = ["//:__subpackages__"],
     licenses = ["notice"],
@@ -22,6 +24,38 @@ py_binary(
     srcs = ["generate_cc_arrays.py"],
 )
 
+py_binary(
+    name = "requantize_flatbuffer",
+    srcs = [
+        "requantize_flatbuffer.py",
+        "requantize_flatbuffer_utils.py",
+    ],
+    srcs_version = "PY3",
+    deps = [
+        "//tensorflow/lite/python:schema_py",
+        "//tensorflow/lite/tools:flatbuffer_utils",
+        "@absl_py//absl:app",
+    ],
+)
+
+py_test(
+    name = "requantize_flatbuffer_test",
+    srcs = ["requantize_flatbuffer_test.py"],
+    main = "requantize_flatbuffer_test.py",
+    python_version = "PY3",
+    tags = [
+        "noasan",
+        "nomsan",  # Python doesn't like these symbols from interpreter_wrapper_pybind.so
+        "noubsan",
+    ],
+    deps = [
+        ":requantize_flatbuffer",
+        "//tensorflow/lite/micro/python/interpreter/src:tflm_runtime",
+        requirement("numpy"),
+        requirement("tensorflow-cpu"),
+    ],
+)
+
 cc_binary(
     name = "tflite_flatbuffer_align",
     srcs = [
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh
index 89d2bdece..b76ba6e26 100755
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel.sh
@@ -26,28 +26,12 @@ source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
 # covers non-test binary targets as well. These were previousbly covered by
 # having build_test but that was removed with #194.
 
-CC=clang readable_run bazel build tensorflow/lite/micro/... \
+CC=clang readable_run bazel build ... \
   --build_tag_filters=-no_oss
-CC=clang readable_run bazel test tensorflow/lite/micro/... \
+CC=clang readable_run bazel test ... \
   --test_tag_filters=-no_oss --build_tag_filters=-no_oss \
   --test_output=errors
 
-CC=clang readable_run bazel build tensorflow/lite/micro/... \
-  --config=msan --build_tag_filters=-no_oss,-nomsan
-CC=clang readable_run bazel test tensorflow/lite/micro/... \
-  --config=msan \
-  --test_tag_filters=-no_oss,-nomsan --build_tag_filters=-no_oss,-nomsan \
-  --test_output=errors
-
-CC=clang readable_run bazel build tensorflow/lite/micro/... \
-  --config=asan --build_tag_filters=-no_oss,-noasan
-CC=clang readable_run bazel test tensorflow/lite/micro/... \
-  --config=asan \
-  --test_tag_filters=-no_oss,-noasan --build_tag_filters=-no_oss,-noasan \
-  --test_output=errors
-
 # TODO(b/178621680): enable ubsan once bazel + clang + ubsan errors are fixed.
 #CC=clang readable_run bazel test tensorflow/lite/micro/... --config=ubsan --test_tag_filters=-no_oss,-noubsan --build_tag_filters=-no_oss,-noubsan
 
-readable_run bazel test tensorflow/lite/tools/... \
-  --test_output=errors
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh
new file mode 100755
index 000000000..9e025f523
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_asan.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR=${SCRIPT_DIR}/../../../../..
+cd "${ROOT_DIR}"
+
+source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+# We are using a bazel build followed by bazel test to make sure that the CI
+# covers non-test binary targets as well. These were previousbly covered by
+# having build_test but that was removed with #194.
+
+CC=clang readable_run bazel build tensorflow/lite/micro/... \
+  --config=asan --build_tag_filters=-no_oss,-noasan
+CC=clang readable_run bazel test tensorflow/lite/micro/... \
+  --config=asan \
+  --test_tag_filters=-no_oss,-noasan --build_tag_filters=-no_oss,-noasan \
+  --test_output=errors
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh
new file mode 100755
index 000000000..a0b355a08
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_msan.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR=${SCRIPT_DIR}/../../../../..
+cd "${ROOT_DIR}"
+
+source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+# We are using a bazel build followed by bazel test to make sure that the CI
+# covers non-test binary targets as well. These were previousbly covered by
+# having build_test but that was removed with #194.
+
+CC=clang readable_run bazel build tensorflow/lite/micro/... \
+  --config=msan --build_tag_filters=-no_oss,-nomsan
+CC=clang readable_run bazel test tensorflow/lite/micro/... \
+  --config=msan \
+  --test_tag_filters=-no_oss,-nomsan --build_tag_filters=-no_oss,-nomsan \
+  --test_output=errors
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh
new file mode 100755
index 000000000..9556cffd8
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bazel_tflite_tools.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR=${SCRIPT_DIR}/../../../../..
+cd "${ROOT_DIR}"
+
+source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run bazel test tensorflow/lite/tools/... \
+  --test_output=errors
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh
new file mode 100755
index 000000000..dad61b179
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_no_release.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code for bluepill platform
+
+set -e
+pwd
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+TARGET=bluepill
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Next, build w/o release so that we can run the tests and get additional
+# debugging info on failures.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh
new file mode 100755
index 000000000..a20be238b
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_release.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code for bluepill platform
+
+set -e
+pwd
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+TARGET=bluepill
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# check that the release build is ok.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} BUILD_TYPE=release build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh
new file mode 100755
index 000000000..ec7a68f4e
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_bluepill_renode.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code for bluepill platform
+
+set -e
+pwd
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+TARGET=bluepill
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# We use Renode differently when running the full test suite (make test) vs an
+# individual test. So, we test only of the kernels individually as well to have
+# both of the Renode variations be part of the CI.
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} test_kernel_add_test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh
index 4c1ab4efa..81ca2c65e 100755
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_code_style.sh
@@ -45,6 +45,7 @@ tensorflow/lite/micro/tools/make/downloads/pigweed/pw_presubmit/py/pw_presubmit/
   -e kernels/internal/reference/integer_ops/ \
   -e kernels/internal/reference/reference_ops.h \
   -e python/schema_py_generated.py \
+  -e python_requirements.in \
   -e tools/make/downloads \
   -e tools/make/targets/ecm3531 \
   -e BUILD\
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_no_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_no_release.sh
new file mode 100755
index 000000000..39bc146f6
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_no_release.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code for stm32f4
+
+set -e
+
+TARGET=stm32f4
+OPTIMIZED_KERNEL_DIR=cmsis_nn
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Build w/o release so that we can run the tests and get additional
+# debugging info on failures.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_release.sh
new file mode 100755
index 000000000..8c1000948
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_stm32f4_release.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code for stm32f4
+
+set -e
+
+TARGET=stm32f4
+OPTIMIZED_KERNEL_DIR=cmsis_nn
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Make sure that the release build succeeds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
new file mode 100755
index 000000000..623238ed0
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code using native x86 execution.
+#
+# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test
+# suite on github actions.
+
+set -e
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Build w/o release so that we can run the tests and get additional
+# debugging info on failures.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh
new file mode 100755
index 000000000..9d63a2608
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_no_tflite_static_memory.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code using native x86 execution.
+#
+# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test
+# suite on github actions.
+
+set -e
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Build w/o TF_LITE_STATIC_MEMORY to catch additional errors.
+# TODO(b/160955687): We run the tests w/o TF_LITE_STATIC_MEMORY to make the
+# internal and open source CI consistent. See b/160955687#comment7 for more
+# details.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=no_tf_lite_static_memory test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh
new file mode 100755
index 000000000..6a0213610
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_out_of_tree.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code using native x86 execution.
+#
+# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test
+# suite on github actions.
+
+set -e
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Test the hello_world as an example outside of the github repo.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+cp -r ${TENSORFLOW_ROOT}tensorflow/lite/micro/examples/hello_world ./
+sed -i 's/tensorflow\/lite\/micro\/examples\///g' hello_world/Makefile.inc
+sed -i 's/$(TENSORFLOW_ROOT)//g' hello_world/Makefile.inc
+mv hello_world/Makefile.inc hello_world/Makefile_internal.inc
+sed -i 's/tensorflow\/lite\/micro\/examples\///g' hello_world/evaluate_test.cc
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test_evaluate_cc_test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=hello_world/
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=hello_world/
+rm -rf hello_world
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh
new file mode 100755
index 000000000..ec96f99c9
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_x86_release.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Called with following arguments:
+# 1 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 2 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+# Tests the microcontroller code using native x86 execution.
+#
+# This file is a subset of the tests in test_x86.sh. It is for parallelizing the test
+# suite on github actions.
+
+set -e
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Build with release and logs so that we can run the tests and get
+# additional debugging info on failures.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release_with_logs integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# Next, make sure that the release build succeeds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+readable_run make -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh
new file mode 100755
index 000000000..abfe651c6
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - EXTERNAL or INTERNAL to signal how to run the script
+# 2 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 3 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+
+set -e
+pwd
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143904317): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifimini \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=mini1m1m_RG \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  build -j$(nproc)
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifimini \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=mini1m1m_RG \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  test -j$(nproc)
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
index 68176c566..3de4ef406 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
@@ -75,7 +75,7 @@ void RunModel(const uint8_t* model,
   TfLiteTensor* output_tensor = interpreter.output(0);
   TF_LITE_MICRO_EXPECT_EQ(output_tensor->bytes,
                           golden_size * sizeof(int8_t));
-  int8_t* output = GetTensorData<int8_t>(output_tensor);
+  int8_t* output = ::tflite::GetTensorData<int8_t>(output_tensor);
   for (uint32_t i = 0; i < golden_size; i++) {
     // TODO(b/205046520): Better understand why TfLite and TFLM can sometimes be
     // off by 1.
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile
index 25c234d75..61d0e7fd6 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -267,8 +267,8 @@ endif
 # Kernel integration tests must be excluded on certain targets.
 MICRO_LITE_INTEGRATION_TESTS += $(shell find $(TENSORFLOW_ROOT)tensorflow/lite/micro/integration_tests -name Makefile.inc)
 
-MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += $(shell find \
-$(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect -name Makefile.inc)
+MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += \
+  $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect/Makefile.inc)
 
 MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/benchmarks/Makefile.inc)
 
@@ -357,6 +357,7 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/logistic.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/logistic_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/log_softmax.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/lstm_eval.cc \
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/lstm_eval_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/maximum_minimum.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/micro_tensor_utils.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/mirror_pad.cc \
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index 35fa6c7c8..9a5e4b703 100755
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -47,9 +47,9 @@ if [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then
   echo >&2 "${DOWNLOADED_CMSIS_NN_PATH} already exists, skipping the download."
 else
 
-  ZIP_PREFIX_NN="e98ee09a03dd12d4b3eac6f7efa25d3ad62a24b9"
+  ZIP_PREFIX_NN="d071e9f70195559e7242709b8df3adeb7c50d0fb"
   CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
-  CMSIS_NN_MD5="a0e4b5f2c5c62405c304c7ffcc64af3b"
+  CMSIS_NN_MD5="0364a1a83f86a5104b893a4d21df7874"
 
   # wget is much faster than git clone of the entire repo. So we wget a specific
   # version and can then apply a patch, as needed.
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
index 20ee1e4e0..3b282676a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
@@ -8,15 +8,24 @@ MICROLITE_CC_KERNEL_SRCS += \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/conv_vision.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/depthwise_conv_vision.cc \
+  $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_common_xtensa.cc \
+  $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_int8.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/fully_connected_vision.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pad_vision.cc \
+  $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pooling_int8.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/pooling_vision.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/reduce_vision.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/reshape_vision.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_vision.cc
 
-ifeq ($(TARGET_ARCH), hifi5)
+ifeq ($(TARGET_ARCH), hifimini)
+  # hifimini optimizations are implemented in the TFLM repository itself.
+  THIRD_PARTY_KERNEL_CC_SRCS += \
+    $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/svdf.cc \
+    $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/fully_connected.cc
+
+else ifeq ($(TARGET_ARCH), hifi5)
   DOWNLOAD_RESULT := $(shell $(MAKEFILE_DIR)/ext_libs/xtensa_download.sh ${DOWNLOADS_DIR} hifi5 $(TENSORFLOW_ROOT))
   ifneq ($(DOWNLOAD_RESULT), SUCCESS)
     $(error Something went wrong with the xtensa download: $(DOWNLOAD_RESULT))
@@ -124,7 +133,7 @@ else ifeq ($(TARGET_ARCH), vision_p6)
   INCLUDES += \
     -I$(NNLIB_PATH)/flk/include \
     -I$(NNLIB_PATH)/kernels/include/ \
-    -I$(NNLIB_PATH)/runtime/include/ 
+    -I$(NNLIB_PATH)/runtime/include/
 
   LDFLAGS += -lidma
 else
@@ -141,4 +150,10 @@ THIRD_PARTY_KERNEL_CC_SRCS += \
 
 THIRD_PARTY_CC_HDRS += \
     $(shell find $(FFT_PATH)/hifi3_fft -name "*.h")
+else ifeq ($(TARGET_ARCH), hifimini)
+THIRD_PARTY_KERNEL_CC_SRCS += \
+    $(shell find $(FFT_PATH)/hifi2_fft -name "*.c")
+
+THIRD_PARTY_CC_HDRS += \
+    $(shell find $(FFT_PATH)/hifi2_fft -name "*.h")
 endif
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc
index 2325aa121..ad3d44c45 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/helper_functions.inc
@@ -69,8 +69,16 @@ else
   MICROLITE_TEST_TARGETS += test_$(1)
 endif
 
-test_$(1): $$($(1)_BINARY)
+# For bluepill, the CI build is failing due to introduction of the
+# introduction of test_run_latency.sh script. Looks at
+# https://b.corp.google.com/issues/268565399#comment11 for more details.
+ifneq ($(TARGET), bluepill)
+test_$(1):$$($(1)_BINARY)
+	$(MAKEFILE_DIR)/test_latency_log.sh $(1) $$(TEST_SCRIPT) $$($(1)_BINARY) $$(TEST_PASS_STRING) $$(TARGET)
+else
+test_$(1):$$($(1)_BINARY)
 	$$(TEST_SCRIPT) $$($(1)_BINARY) $$(TEST_PASS_STRING) $$(TARGET)
+endif
 
 else
 run_$(1): $$($(1)_BINARY)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh
new file mode 100755
index 000000000..7079285aa
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/test_latency_log.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# This script is responsible for running the tests and also to log out the
+# time (in seconds) it took to run the test file. It is using the linux time 
+# command to measure the latency. Setting the TIMEFORMAT to '%R' is providing 
+# us the real time latency.
+#
+# Called with following arguments:
+# 1 - Name of the test file
+# 2 - Name of the test script
+# 3 - Name of the binary
+# 4 - String output after all the tests are passed
+# 5 - Name of the target
+# The first parameter is used for logging purpose. The last four parameters are
+# used to run the test.
+
+set -e
+
+TEST_FILE_NAME=${1}
+TEST_SCRIPT=${2}
+BINARY_NAME=${3}
+TEST_PASS_STRING=${4}
+TARGET_NAME=${5}
+
+# Output to stdout and stderr go to their normal places:
+# Here we are opening 2 file descriptor, 3 and 4. FD 3 
+# will redirect all the contents to stdout and 4 will
+# redirect all the contents to stderr. Now when executing 
+# the TEST_SCRIPT command, we are redirecting all the stdout 
+# output of the command to FD 3 which will redirect everything 
+# to FD 1 (stdout) and all the stderr output of the command to 
+# FD 4 which will redirect everything to FD 2 (stderr). The 
+# output of the time command is captured in the time_log
+# variable with the redirection of FD 2 (stderr) to FD 1 
+# (stdout). Finally we are closing the FD 3 and 4.For more info
+# https://stackoverflow.com/questions/4617489/get-values-from-time-command-via-bash-script
+exec 3>&1 4>&2
+time_log=$( { TIMEFORMAT="%R"; time ${TEST_SCRIPT} ${BINARY_NAME} ${TEST_PASS_STRING} ${TARGET_NAME} 1>&3 2>&4; } 2>&1 ) # Captures time output only.
+exec 3>&- 4>&-
+
+echo "Running ${TEST_FILE_NAME} took ${time_log} seconds"
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile b/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile
index a0d462190..199d57192 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/project_generation/Makefile
@@ -109,9 +109,13 @@ clean:
 
 libtflm: $(LIB)
 
+HELLO_WORLD_SRCS := $(wildcard examples/hello_world/*.cc)
+HELLO_WORLD_SRCS += $(wildcard examples/hello_world/models/*.cc)
+HELLO_WORLD_INCLUDES := $(INCLUDES) -I./examples/hello_world
+
 hello_world: libtflm
 	@mkdir -p $(BINDIR)
-	$(CXX) $(CXXFLAGS) $(wildcard examples/hello_world/*.cc) $(INCLUDES) $(LIB) -o $(BINDIR)/$@
+	$(CXX) $(CXXFLAGS) $(HELLO_WORLD_SRCS) $(HELLO_WORLD_INCLUDES) $(LIB) -o $(BINDIR)/$@
 
 magic_wand: libtflm
 	@mkdir -p $(BINDIR)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py
new file mode 100644
index 000000000..a77f97fb8
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer.py
@@ -0,0 +1,222 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""An experimental tool to requantize a int8 activation, int8 weight LSTM based model to int16 activation, int8 weight
+
+Steps: 
+1. Convert the trained model to int8 using the TFLite converter. See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization
+2. Use this tool to requantize the int8 model to int16.
+3. Check if the requantized model match the expectation (e.g., read the conversion printout, perform inference tests)
+
+The conversion process: 
+1. Requantize the ops specified in _COMPLEX_OP_REQUANTIZE_REGISTRATION using the registered function. Bias type conversion (int32 to int64) only happens here. 
+2. Requantize all non-constant tensors with int8 type to int16 (and fix the quantization parameters)
+
+Run:
+bazel build tensorflow/lite/micro/tools:requantize_flatbuffer
+bazel-bin/tensorflow/lite/micro/tools/requantize_flatbuffer --int8_model_path=".tflite file path"` --save_path="save path"
+
+CAVEAT: 
+1. Use this tool ONLY for models that contain the LSTM layer. All other models should use the standard tflite conversion process.
+2. This is an experimental tool. ALWAYS check if the converted model matches your expectation
+3. Add the custom op requantization function for complex ops (e.g., convolution). 
+4. We assume ops not in _COMPLEX_OP_REQUANTIZE_REGISTRATION only have activation tensors (i.e. no weights and bias). Check the quantized model performance if you add additional ops to _TESTED_SIMPLE_OPS 
+
+"""
+import os
+
+import numpy as np
+from absl import app
+from absl import flags
+from absl import logging
+
+from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
+from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer_utils
+from tflite_micro.tensorflow.lite.python import schema_py_generated
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("int8_model_path",
+                    default=None,
+                    help="the int8 model path.")
+flags.DEFINE_string("save_path",
+                    default=None,
+                    help="path to save the requantized model.")
+
+# key: BuiltinOperator (see tensorflow/lite/schema/schema.fbs)
+# Val: the requantize function defined in requantize_flatbuffer_utils.py
+# FULLY_CONNECTED, CONV_2D, DEPTHWISE_CONV_2D share the same requantize function
+# since they all share the same input/weight/bias configuration.
+_COMPLEX_OP_REQUANTIZE_REGISTRATION = {
+    schema_py_generated.BuiltinOperator.FULLY_CONNECTED:
+    requantize_flatbuffer_utils.requantize_fully_connected,
+    schema_py_generated.BuiltinOperator.UNIDIRECTIONAL_SEQUENCE_LSTM:
+    requantize_flatbuffer_utils.requantize_unidirectional_sequence_lstm,
+    schema_py_generated.BuiltinOperator.SOFTMAX:
+    requantize_flatbuffer_utils.requantize_softmax,
+    schema_py_generated.BuiltinOperator.CONV_2D:
+    requantize_flatbuffer_utils.requantize_fully_connected,
+    schema_py_generated.BuiltinOperator.DEPTHWISE_CONV_2D:
+    requantize_flatbuffer_utils.requantize_fully_connected,
+    schema_py_generated.BuiltinOperator.TRANSPOSE_CONV:
+    requantize_flatbuffer_utils.requantize_transpose_conv,
+}
+
+# List of tested simple operators (no weight and bias, e.g., reshape) see tensorflow/lite/schema/schema.fbs for op code names
+_TESTED_SIMPLE_OPS = [
+    schema_py_generated.BuiltinOperator.RESHAPE,
+    schema_py_generated.BuiltinOperator.QUANTIZE,
+    schema_py_generated.BuiltinOperator.DEQUANTIZE,
+    schema_py_generated.BuiltinOperator.MEAN,
+    schema_py_generated.BuiltinOperator.SQUARED_DIFFERENCE,
+    schema_py_generated.BuiltinOperator.ADD,
+    schema_py_generated.BuiltinOperator.RSQRT,
+    schema_py_generated.BuiltinOperator.MUL,
+    schema_py_generated.BuiltinOperator.SUB,
+    schema_py_generated.BuiltinOperator.LEAKY_RELU,
+    schema_py_generated.BuiltinOperator.LOGISTIC,
+    schema_py_generated.BuiltinOperator.PAD
+]
+
+_SUPPORTED_OPS = set(
+    list(_COMPLEX_OP_REQUANTIZE_REGISTRATION.keys()) + _TESTED_SIMPLE_OPS)
+
+
+class Requantizer:
+  """Requantize an int8 activation model to int16"""
+
+  def __init__(self, int8_model):
+    """Initialize the int8 to int16 converter.
+
+    Args:
+      int8_model: flatbuffer python object
+    """
+    self.model = int8_model
+    self.remaining_tensors = set()
+    for subgraph in self.model.subgraphs:
+      for tensor in subgraph.tensors:
+        self.remaining_tensors.add(tensor)
+
+  @classmethod
+  def from_file(self, model_path):
+    """Instantiates a converter from a int8 quantized .tflite filepath.
+
+    Args:
+      model_path: Filepath to the .tflite model
+
+    Returns:
+      An Int8ToInt16Converter instance
+    """
+    int8_model = flatbuffer_utils.read_model(model_path)
+    return Requantizer(int8_model)
+
+  @classmethod
+  def from_bytes(self, bytearray):
+    """Instantiates a converter from a int8 quantized .tflite bytearray.
+
+    Args:
+      bytearray: Content of the .tflite model
+
+    Returns:
+      An Int8ToInt16Converter instance
+    """
+    int8_model = flatbuffer_utils.convert_bytearray_to_object(bytearray)
+    return Requantizer(int8_model)
+
+  def _remove_tensor(self, tensor):
+    """Remove tensor from the tensor pool"""
+    if tensor in self.remaining_tensors:
+      self.remaining_tensors.remove(tensor)
+
+  def _remove_op_tensors(self, tensors, op):
+    """Remove tensors in an operator from the tensor pool
+
+    Args:
+        tensors: tensors in the subgraph
+        op : the operator
+    """
+    for id in op.inputs:
+      # -1 means non-used tensor
+      if id != -1:
+        self._remove_tensor(tensors[id])
+    for id in op.outputs:
+      if id != -1:
+        self._remove_tensor(tensors[id])
+
+  def _convert_ops(self):
+    """Convert all ops registered in _OP_CONVERSION_REGISTRATION from int8 to int16 (activation type)"""
+    op_codes = self.model.operatorCodes
+    for subgraph in self.model.subgraphs:
+      tensors = subgraph.tensors
+      for op in subgraph.operators:
+        op_code = op_codes[op.opcodeIndex].builtinCode
+        op_name = flatbuffer_utils.opcode_to_name(self.model, op.opcodeIndex)
+        if op_code not in _SUPPORTED_OPS:
+          raise RuntimeError(
+              f"Operator {op_name} is not supported. If the operator contains weight/bias, develop and register the corresponding requantize function in _COMPLEX_OP_CONVERSION_REGISTRATION. Otherwise, try add the op code to  _TESTED_SIMPLE_OPS and validate the requantized model "
+          )
+        if op_code in _COMPLEX_OP_REQUANTIZE_REGISTRATION:
+          logging.info(f"Convert operator {op_name}")
+          _COMPLEX_OP_REQUANTIZE_REGISTRATION[op_code](tensors,
+                                                       self.model.buffers, op)
+          self._remove_op_tensors(tensors, op)
+
+  def _change_tensor_activation_type(self):
+    """Change all remaining tensor types from int8 to int16"""
+    for subgraph in self.model.subgraphs:
+      for tensor in subgraph.tensors:
+        if ((tensor in self.remaining_tensors)
+            and (requantize_flatbuffer_utils.TENSOR_CODE_TYPE[tensor.type]
+                 == np.int8) and ("const" not in str(tensor.name))):
+          requantize_flatbuffer_utils.change_activation_tensor_8to16(
+              tensor, self.model.buffers)
+          self._remove_tensor(tensor)
+
+  def requantize_8to16(self):
+    '''
+    The requantize process has two phase:
+    1. Go through the registered ops and perform the custom op transformation 
+    2. Go through the rest of tensors and convert int8 non-const tensor to int16
+    '''
+
+    logging.info("Reset Operators")
+    self._convert_ops()
+    logging.info("Set Remaining Activation Types")
+    self._change_tensor_activation_type()
+    logging.info("Remaining Tensors:")
+    for tensor in self.remaining_tensors:
+      logging.info(
+          f"{tensor.name}, tensor type {flatbuffer_utils.type_to_name(tensor.type)}"
+      )
+
+  def save_model(self, output_path):
+    """Save the requantized model to a specificed location."""
+    flatbuffer_utils.write_model(self.model, output_path)
+
+  def model_bytearray(self):
+    """Get the flatbuffer bytearray"""
+    return flatbuffer_utils.convert_object_to_bytearray(self.model)
+
+
+def main(_):
+  if not os.path.exists(FLAGS.int8_model_path):
+    raise ValueError(
+        "Model file does not exist. Please check the .tflite model path.")
+  requantizer = Requantizer.from_file(FLAGS.int8_model_path)
+  requantizer.requantize_8to16()
+  requantizer.save_model(FLAGS.save_path)
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
new file mode 100644
index 000000000..3dae5a8a2
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
@@ -0,0 +1,115 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+from tflite_micro.tensorflow.lite.micro.tools import requantize_flatbuffer
+from tflite_micro.tensorflow.lite.micro.python.interpreter.src import tflm_runtime
+from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
+
+
+#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
+def create_simple_fc_model():
+  '''Create a simple model with two fully connected(fc) layers'''
+  model = tf.keras.models.Sequential([
+      tf.keras.layers.InputLayer(input_shape=(28, 28)),
+      tf.keras.layers.Flatten(),
+      tf.keras.layers.Dense(50, activation=tf.nn.relu),
+      tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="output")
+  ])
+  fixed_input = tf.keras.layers.Input(shape=[28, 28],
+                                      batch_size=1,
+                                      dtype=model.inputs[0].dtype,
+                                      name="fixed_input")
+  fixed_output = model(fixed_input)
+  return tf.keras.models.Model(fixed_input, fixed_output)
+
+
+def representative_dataset_gen(num_samples=100):
+  np.random.seed(42)  #Seed the random number generator
+  for _ in range(num_samples):
+    yield [np.random.random((1, 28, 28)).astype(np.float32)]
+
+
+def convert_tfl_converter(keras_model,
+                          representative_dataset_gen,
+                          int16=False):
+  '''Convert and quantize the keras model using the standard tflite converter'''
+  converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+  converter.optimizations = [tf.lite.Optimize.DEFAULT]
+  converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+  if int16:
+    converter.target_spec.supported_ops = [
+        tf.lite.OpsSet.
+        EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+    ]
+  converter.representative_dataset = representative_dataset_gen
+  return converter.convert()
+
+
+def convert_8to16_requantizer(keras_model, representative_dataset_gen):
+  '''Convert and quantize the keras model using the int8 to int16 conversion tool'''
+  # Convert to int8 first
+  int8_model = convert_tfl_converter(keras_model,
+                                     representative_dataset_gen,
+                                     int16=False)
+  int8_model = flatbuffer_utils.convert_bytearray_to_object(int8_model)
+  # Use the tool to convert to int16
+  requantizer = requantize_flatbuffer.Requantizer(int8_model)
+  requantizer.requantize_8to16()
+  return flatbuffer_utils.convert_object_to_bytearray(requantizer.model)
+
+
+class SimpleFCModelTest(test_util.TensorFlowTestCase):
+
+  def testCompareWithStandardConversion(self):
+
+    def inference(tflm_interpreter, data_x):
+      tflm_interpreter.set_input(data_x, 0)
+      tflm_interpreter.invoke()
+      return tflm_interpreter.get_output(0)
+
+    keras_model = create_simple_fc_model(
+    )  # int16 fc is supported in tflite converter
+    tfl_converted_int16_model = convert_tfl_converter(
+        keras_model, representative_dataset_gen, int16=True)
+    int8_converted_int16_model = convert_8to16_requantizer(
+        keras_model, representative_dataset_gen)
+
+    interpreter_tfl_converted = tflm_runtime.Interpreter.from_bytes(
+        tfl_converted_int16_model)
+    interpreter_tool_converted = tflm_runtime.Interpreter.from_bytes(
+        int8_converted_int16_model)
+
+    num_steps = 10
+    # Give the same (random) input to both interpreters to confirm that the outputs are similar.
+    for _ in range(0, num_steps):
+      data_x = np.random.random((1, 28, 28)).astype("float32")
+
+      tfl_converted_result = inference(interpreter_tfl_converted, data_x)[0]
+      tool_converted_result = inference(interpreter_tool_converted, data_x)[0]
+
+      max_diff = max(abs(tool_converted_result - tfl_converted_result))
+      self.assertLess(
+          max_diff, 1e-4
+      )  # can not be the same since int8 model loses some range information
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py
new file mode 100644
index 000000000..5709ff2cf
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/requantize_flatbuffer_utils.py
@@ -0,0 +1,325 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import numpy as np
+from absl import logging
+from tflite_micro.tensorflow.lite.python.schema_py_generated import TensorType
+
+# Map flatbuffer tensor type code to numpy data type. see Table TensorType in tensorflow/lite/schema/schema.fbs
+# TODO(b/269487423): use a common util function instead
+TENSOR_CODE_TYPE = {
+    TensorType.FLOAT32: np.float32,
+    TensorType.FLOAT16: np.float16,
+    TensorType.INT32: np.int32,
+    TensorType.UINT8: np.uint8,
+    TensorType.INT64: np.int64,
+    TensorType.STRING: np.string_,
+    TensorType.BOOL: np.bool_,
+    TensorType.INT16: np.int16,
+    TensorType.COMPLEX64: np.complex64,
+    TensorType.INT8: np.int8,
+    TensorType.FLOAT64: np.float64,
+    TensorType.COMPLEX128: np.complex128,
+    TensorType.UINT64: np.uint64,
+    TensorType.RESOURCE: "RESOURCE",
+    TensorType.VARIANT: "VARIANT",
+    TensorType.UINT32: np.uint32,
+    TensorType.UINT16: np.uint16,
+    TensorType.INT4: "INT4",
+}
+
+# TODO(b/269487423): use a common util function instead
+TENSOR_TYPE_CODE = dict((reversed(item) for item in TENSOR_CODE_TYPE.items()))
+
+
+def clip_range(vals, bit_width):
+  """Mimic integer calculation.
+
+  Clip the range of vals based on bit width.
+
+  e.g., clip_range([300], 8) = [127] since int8 have range [-128, 127]
+
+  Args:
+      vals (np.array): float representation of the integer values
+      bit_width (int): number of desired bits for vals
+
+  Returns:
+      np.array : clipped vals
+  """
+  # Numpy integer calculation does not do saturation. Implement here
+  min_val = -2**(bit_width - 1)
+  max_val = 2**(bit_width - 1) - 1
+  if vals.max() > max_val or vals.min() < min_val:
+    logging.info(f"WARNING: integer overflow!")
+  return np.clip(vals, min_val, max_val)
+
+
+def quantize_data(data, scale, zero_point=0, bit_width=8):
+  """Quantize the data to integer type with desired bit width.
+
+  The quantized data is represented using float since integer calculation in
+  numpy may differ from other implementations (e.g., no integer saturation
+  protection in numpy)
+
+  Args:
+      data (np.array): float data
+      scale (float): quantization scale of the data
+      zero_point (integer): quantization zero point of the data
+      bit_width (int): number of representative bits for vals
+
+  Returns:
+      np.array : quantized data in float but clipped range
+  """
+  vals = np.round(data / scale) + zero_point
+  return clip_range(vals, bit_width)
+
+
+def dequantize_data(quantized_data, scale, zero_point=0):
+  """Dequantize the data to integer type with desired bit width.
+
+  Args:
+      quantized_data (np.array): quantized data
+      scale (float): quantization scale of the data
+      zero_point (integer): quantization zero point of the data
+
+  Returns:
+      np.array : dequantized data
+  """
+  return scale * (quantized_data - zero_point)
+
+
+def change_quantization_settings_8to16(tensor, buffers):
+  """Change the quantization seeting of the tensor from int8 to int16"""
+
+  if (tensor.quantization.quantizedDimension != 0):
+    raise RuntimeError(
+        "Only layer level quantization is supported. Per channel quantization is not supported now"
+    )
+
+  scale = tensor.quantization.scale[0]
+  zero_point = tensor.quantization.zeroPoint[0]
+
+  # Set MAX_INT8 from 127 to 128 to compromise the range precision loss due to int8 quantization
+  MIN_INT8, MAX_INT8 = -128, 128
+  # Narrow range (-min == max) is used for symmetrical quantization
+  MIN_INT16, MAX_INT16 = -32767, 32767
+
+  # Asymmertical quantized: scale * (qmax - zero_point) = rmax
+  rmax = scale * (MAX_INT8 - zero_point)
+  rmin = scale * (MIN_INT8 - zero_point)
+  # symmertical quantized: scale * qmax = rmax
+  scale_16 = max(abs(rmax), abs(rmin)) / abs(MIN_INT16)
+  # Change scale: Symmetrical Quantized
+  tensor.quantization.scale = [scale_16]
+  tensor.quantization.zeroPoint = [0]
+
+  # requantize the buffer data to int16 if necessary
+  tensor_buffer = buffers[tensor.buffer]
+  if type(tensor_buffer.data) != type(None):
+    expected_buffer_size = np.prod(tensor.shape)
+    data = np.frombuffer(tensor_buffer.data, dtype=np.int8)
+    # Different ops may share one buffer. No need to requantize the buffer
+    # if the buffer has already been processed to int16 (2 bytes)
+    if data.nbytes == expected_buffer_size * 2:
+      return
+    elif data.nbytes != expected_buffer_size:
+      raise RuntimeError(
+          f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}"
+      )
+    dequantized_data = dequantize_data(data, tensor.quantization.scale,
+                                       tensor.quantization.zeroPoint)
+    int16_data = quantize_data(dequantized_data, scale_16, 0,
+                               16).astype(np.int16)
+    tensor_buffer.data = int16_data.tobytes()
+
+
+def change_activation_tensor_8to16(tensor, buffers):
+  """Change the quantization setting of a activation tensor from int8 to int16"""
+  if tensor.type == TENSOR_TYPE_CODE[np.int8]:
+    change_quantization_settings_8to16(tensor, buffers)
+    tensor.type = TENSOR_TYPE_CODE[np.int16]
+    logging.info(f"Set {tensor.name} from int8 to int16 ")
+
+
+def requantize_bias_perlayer(buffers, input, weight, bias):
+  """Bias is layer wise quantized """
+  bias_buffer = buffers[bias.buffer]
+  bias_scale = bias.quantization.scale[0]
+  bias_zero_pt = bias.quantization.zeroPoint[0]
+  data = np.frombuffer(bias_buffer.data, dtype=np.int32)
+
+  # change scale and zero point
+  bias_scale_int64 = (input.quantization.scale[0] *
+                      weight.quantization.scale[0])
+  bias_zero_pt_int64 = 0  # symmetrical quantized
+  bias.type = TENSOR_TYPE_CODE[np.int64]
+  bias.quantization.scale = [bias_scale_int64]
+  bias.quantization.zeroPoint = [bias_zero_pt_int64]
+
+  expected_buffer_size = bias.shape[0]  # bias has only one dimension
+  # Different ops may share one buffer. No need to requantize the buffer
+  # if the buffer has already been processed to int64 (8 bytes)
+  if data.nbytes == expected_buffer_size * 8:
+    return
+  elif data.nbytes != expected_buffer_size * 4:
+    raise RuntimeError(
+        f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}"
+    )
+  dequantized_data = dequantize_data(data, bias_scale, bias_zero_pt)
+  int64_data = quantize_data(dequantized_data, bias_scale_int64,
+                             bias_zero_pt_int64, 64).astype(np.int64)
+  bias_buffer.data = int64_data.tobytes()
+
+
+def requantize_bias_perchannel(buffers, input, weight, bias):
+  """Bias is channel wise quantized. Requantize bias one by one """
+  bias_buffer = buffers[bias.buffer]
+  data = np.frombuffer(bias_buffer.data, dtype=np.int32)
+  expected_buffer_size = bias.shape[0]  # bias has only one dimension
+  # whether to requantize the bias buffer, False if the buffer has already been requantized
+  requantize_buffer = True
+  # Different ops may share one buffer. No need to requantize the buffer
+  # if the buffer has already been processed to int64 (8 bytes)
+  if data.nbytes == expected_buffer_size * 8:
+    requantize_buffer = False
+  elif data.nbytes != expected_buffer_size * 4:
+    raise RuntimeError(
+        f"Bias buffer size {data.nbytes} does not match the expected size {expected_buffer_size * 4}"
+    )
+  if len(bias.quantization.scale) != len(weight.quantization.scale):
+    raise RuntimeError(
+        f" Per channel quantization requires number of bias scales ({len(bias.quantization.scale)}),\
+         equals to number of weight scales ({len(weight.quantization.scale)}) "
+    )
+  requantized_data = []
+  requantized_scales = []
+  requantized_zero_points = []
+  for element_data, bias_scale, weight_scale, bias_zero_point in zip(
+      data, bias.quantization.scale, weight.quantization.scale,
+      bias.quantization.zeroPoint):
+    bias_scale_int64 = (input.quantization.scale[0] * weight_scale)
+    bias_zero_pt_int64 = 0  # symmetrical quantized
+    requantized_scales.append(bias_scale_int64)
+    requantized_zero_points.append(bias_zero_pt_int64)
+
+    if requantize_buffer:
+      dequantized_data = dequantize_data(element_data, bias_scale,
+                                         bias_zero_point)
+      int64_data = quantize_data(dequantized_data, bias_scale_int64,
+                                 bias_zero_pt_int64, 64).astype(np.int64)
+      requantized_data.append(int64_data)
+
+  bias.type = TENSOR_TYPE_CODE[np.int64]
+  bias.quantization.scale = requantized_scales
+  bias.quantization.zeroPoint = requantized_zero_points
+  if requantize_buffer:
+    bias_buffer.data = np.array(requantized_data).tobytes()
+
+
+def set_bias_type_int64(buffers, input, weight, bias):
+  """Set the bias tensor quantization setting from int32 to int64
+
+  Args:
+      buffers (list): buffers for the model 
+      input (Tensor): the corresponding input tensor for the bias
+      weight (Tensor): the corresponding weight tensor for the bias
+      bias (Tensor): the bias tensor that need to be modified
+  """
+  if bias.type == TENSOR_TYPE_CODE[np.int32]:
+    if len(bias.quantization.scale) == 1:
+      requantize_bias_perlayer(buffers, input, weight, bias)
+    else:
+      requantize_bias_perchannel(buffers, input, weight, bias)
+
+
+def requantize_fully_connected(tensors, buffers, op):
+  """Requantize the fully connected op from int8 to int16
+  
+  Note: CONV_2D and DEPTHWISE_CONV_2D also use this requantize function since they all share the same input/weight/bias configuration. 
+  See tensorflow/lite/micro/kernels/fully_connected_common.cc
+  tflite_micro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
+  tflite_micro/tensorflow/lite/micro/kernels/conv_common.cc
+  """
+  # Indices are from tensorflow/lite/micro/kernels/fully_connected_common.cc
+  input_tensor = tensors[op.inputs[0]]
+  # weight stays the same, no change needed
+  weight_tensor = tensors[op.inputs[1]]
+  output_tensor = tensors[op.outputs[0]]
+
+  change_activation_tensor_8to16(input_tensor, buffers)
+  change_activation_tensor_8to16(output_tensor, buffers)
+  # if the bias does not exist, op.inputs[2] == -1
+  if op.inputs[2] != -1:
+    bias_tensor = tensors[op.inputs[2]]
+    set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor)
+
+
+def requantize_unidirectional_sequence_lstm(tensors, buffers, op):
+  """Requantize the unidirectonal sequance lstm op from int8 to int16 """
+  input_tensor = tensors[op.inputs[0]]
+  hidden_state_tensor = tensors[op.inputs[18]]
+  output_tensor = tensors[op.outputs[0]]
+
+  # Indices are from tensorflow/lite/micro/kernels/lstm_shared.h
+  input_weights_idx = [1, 2, 3, 4]
+  recurrent_weights_idx = [5, 6, 7, 8]
+  bias_idx = [12, 13, 14, 15]
+
+  change_activation_tensor_8to16(input_tensor, buffers)
+  change_activation_tensor_8to16(hidden_state_tensor, buffers)
+  change_activation_tensor_8to16(output_tensor, buffers)
+
+  for weight_id, bias_id in zip(input_weights_idx, bias_idx):
+    weight_tensor = tensors[op.inputs[weight_id]]
+    bias_tensor = tensors[op.inputs[bias_id]]
+    set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor)
+
+  # recurrent weights have no associated biases
+  for weight_id in recurrent_weights_idx:
+    weight_tensor = tensors[op.inputs[weight_id]]
+
+
+def requantize_softmax(tensors, buffers, op):
+  """Requantize the softmax op from int8 to int16"""
+  input_tensor = tensors[op.inputs[0]]
+  output_tensor = tensors[op.outputs[0]]
+
+  # Change input type
+  change_activation_tensor_8to16(input_tensor, buffers)
+
+  # Output range is always [0,1]
+  if output_tensor.type == TENSOR_TYPE_CODE[np.int8]:
+    # change quantization settings
+    output_tensor.quantization.scale = [1 / 32768]
+    output_tensor.quantization.zeroPoint = [0]
+    # Set tensor type
+    output_tensor.type = TENSOR_TYPE_CODE[np.int16]
+    logging.info(f"Set {output_tensor.name} from int8 to int16 ")
+
+
+def requantize_transpose_conv(tensors, buffers, op):
+  """Requantize the transpose conv op from int8 to int16"""
+  # Indices are from tensorflow/lite/micro/kernels/transpose_conv.cc
+  input_tensor = tensors[op.inputs[2]]
+  # weight stays the same, no change needed
+  weight_tensor = tensors[op.inputs[1]]
+  output_tensor = tensors[op.outputs[0]]
+
+  change_activation_tensor_8to16(input_tensor, buffers)
+  change_activation_tensor_8to16(output_tensor, buffers)
+  # if the bias does not exist, op.inputs[2] == -1
+  if len(op.inputs) > 3:
+    if op.inputs[3] != -1:
+      bias_tensor = tensors[op.inputs[3]]
+      set_bias_type_int64(buffers, input_tensor, weight_tensor, bias_tensor)
\ No newline at end of file