diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version
index 9874f722c..a7b3835fb 100644
--- a/conf/tflite-micro.version
+++ b/conf/tflite-micro.version
@@ -1 +1 @@
-8746ec9
+9eb4fb1
diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
index 827312b45..602d9e078 100644
--- a/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.cc
@@ -219,11 +219,11 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
   return kTfLiteOk;
 }
 
-void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
-                                 bool preserve_data) {
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data) {
   if (tensor->allocation_type != kTfLiteDynamic &&
       tensor->allocation_type != kTfLitePersistentRo) {
-    return;
+    return kTfLiteOk;
   }
 #ifdef TF_LITE_TENSORFLOW_PROFILER
   tflite::PauseHeapMonitoring(/*pause=*/true);
@@ -258,9 +258,15 @@ void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
   tflite::PauseHeapMonitoring(/*pause=*/false);
 #endif
   tensor->bytes = num_bytes;
+  if (tensor->data.data == nullptr && num_bytes != 0) {
+    // We are done allocating but tensor is pointing to null and a valid size
+    // was requested, so we error.
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
 }
 
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
   return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true);
 }
 #endif  // TF_LITE_STATIC_MEMORY
@@ -331,4 +337,18 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) {
   delete tflite_delegate;
 }
 
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) {
+  if (!delegate) return nullptr;
+
+  // The following cast is safe only because this code is part of the
+  // TF Lite runtime implementation.  Apps using TF Lite should not rely on
+  // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent.
+  const auto* tflite_delegate =
+      reinterpret_cast<const TfLiteDelegate*>(delegate);
+
+  if (!tflite_delegate->opaque_delegate_builder) return nullptr;
+
+  return tflite_delegate->opaque_delegate_builder->data;
+}
+
 }  // extern "C"
diff --git a/third_party/tflite-micro/tensorflow/lite/core/c/common.h b/third_party/tflite-micro/tensorflow/lite/core/c/common.h
index 46d5e650a..cf96a1163 100644
--- a/third_party/tflite-micro/tensorflow/lite/core/c/common.h
+++ b/third_party/tflite-micro/tensorflow/lite/core/c/common.h
@@ -42,6 +42,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_
 #define TENSORFLOW_LITE_CORE_C_COMMON_H_
 
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -179,10 +180,23 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
       (context)->ReportError((context), __VA_ARGS__); \
     }                                                 \
   } while (false)
+#define TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, ...)             \
+  do {                                                             \
+    TfLiteOpaqueContextReportError((opaque_context), __VA_ARGS__); \
+  } while (false)
+#define TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(opaque_context, ...)         \
+  do {                                                               \
+    if ((opaque_context) != nullptr) {                               \
+      TfLiteOpaqueContextReportError((opaque_context), __VA_ARGS__); \
+    }                                                                \
+  } while (false)
 #else  // TF_LITE_STRIP_ERROR_STRINGS
 #define ARGS_UNUSED(...) (void)sizeof(#__VA_ARGS__)
 #define TF_LITE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__)
 #define TF_LITE_MAYBE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__)
+#define TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, ...) ARGS_UNUSED(__VA_ARGS__)
+#define TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(opaque_context, ...) \
+  ARGS_UNUSED(__VA_ARGS__)
 #endif  // TF_LITE_STRIP_ERROR_STRINGS
 
 // Check whether value is true, and if not return kTfLiteError from
@@ -195,6 +209,15 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                  \
   } while (0)
 
+// Check whether value is true, and if not return kTfLiteError from
+// the current function (and report the error string msg).
+#define TF_LITE_OPAQUE_ENSURE_MSG(opaque_context, value, msg)        \
+  do {                                                               \
+    if (!(value)) {                                                  \
+      TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), __FILE__ " " msg); \
+      return kTfLiteError;                                           \
+    }                                                                \
+  } while (0)
 // Check whether the value `a` is true, and if not return kTfLiteError from
 // the current function, while also reporting the location of the error.
 #define TF_LITE_ENSURE(context, a)                                      \
@@ -206,6 +229,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                                   \
   } while (0)
 
+// Check whether the value `a` is true, and if not return kTfLiteError from
+// the current function, while also reporting the location of the error.
+#define TF_LITE_OPAQUE_ENSURE(opaque_context, a)                           \
+  do {                                                                     \
+    if (!(a)) {                                                            \
+      TF_LITE_OPAQUE_KERNEL_LOG(opaque_context, "%s:%d: %s was not true.", \
+                                __FILE__, __LINE__, #a);                   \
+      return kTfLiteError;                                                 \
+    }                                                                      \
+  } while (0)
 #define TF_LITE_ENSURE_STATUS(a) \
   do {                           \
     const TfLiteStatus s = (a);  \
@@ -228,6 +261,19 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                                      \
   } while (0)
 
+// Check whether the value `a == b` is true, and if not return kTfLiteError from
+// the current function, while also reporting the location of the error.
+// `a` and `b` may be evaluated more than once, so no side effects or
+// extremely expensive computations should be done.
+// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
+#define TF_LITE_OPAQUE_ENSURE_EQ(opaque_context, a, b)                         \
+  do {                                                                         \
+    if ((a) != (b)) {                                                          \
+      TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), "%s:%d %s != %s (%d != %d)", \
+                                __FILE__, __LINE__, #a, #b, (a), (b));         \
+      return kTfLiteError;                                                     \
+    }                                                                          \
+  } while (0)
 #define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
   do {                                                                     \
     if ((a) != (b)) {                                                      \
@@ -238,6 +284,15 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                                      \
   } while (0)
 
+#define TF_LITE_OPAQUE_ENSURE_TYPES_EQ(opaque_context, a, b)                   \
+  do {                                                                         \
+    if ((a) != (b)) {                                                          \
+      TF_LITE_OPAQUE_KERNEL_LOG((opaque_context), "%s:%d %s != %s (%s != %s)", \
+                                __FILE__, __LINE__, #a, #b,                    \
+                                TfLiteTypeGetName(a), TfLiteTypeGetName(b));   \
+      return kTfLiteError;                                                     \
+    }                                                                          \
+  } while (0)
 #define TF_LITE_ENSURE_NEAR(context, a, b, epsilon)                          \
   do {                                                                       \
     auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a));                    \
@@ -249,6 +304,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                                        \
   } while (0)
 
+#define TF_LITE_OPAQUE_ENSURE_NEAR(opaque_context, a, b, epsilon)            \
+  do {                                                                       \
+    auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a));                    \
+    if (delta > epsilon) {                                                   \
+      TF_LITE_OPAQUE_KERNEL_LOG(                                             \
+          (opaque_context), "%s:%d %s not near %s (%f != %f)", __FILE__,     \
+          __LINE__, #a, #b, static_cast<double>(a), static_cast<double>(b)); \
+      return kTfLiteError;                                                   \
+    }                                                                        \
+  } while (0)
 #define TF_LITE_ENSURE_OK(context, status) \
   do {                                     \
     const TfLiteStatus s = (status);       \
@@ -648,23 +713,26 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst);
 
 // Change the size of the memory block owned by `tensor` to `num_bytes`.
-// Tensors with allocation types other than kTfLiteDynamic will be ignored.
+// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and
+// a kTfLiteOk will be returned.
 // `tensor`'s internal data buffer will be assigned a pointer
 // which can safely be passed to free or realloc if `num_bytes` is zero.
-// Behaviour is undefined if `tensor` is NULL.
 // If `preserve_data` is true, tensor data will be unchanged in the range from
-// the start of the region up to the minimum of the old and new sizes.
-void TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
-                                 bool preserve_data);
+// the start of the region up to the minimum of the old and new sizes. In the
+// case of NULL tensor, or an error allocating new memory, returns
+// `kTfLiteError`.
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data);
 
 // Change the size of the memory block owned by `tensor` to `num_bytes`.
-// Tensors with allocation types other than kTfLiteDynamic will be ignored.
+// Tensors with allocation types other than kTfLiteDynamic will be ignored and
+// a kTfLiteOk will be returned.
 // `tensor`'s internal data buffer will be assigned a pointer
 // which can safely be passed to free or realloc if `num_bytes` is zero.
-// Behaviour is undefined if `tensor` is NULL.
 // Tensor data will be unchanged in the range from the start of the region up to
-// the minimum of the old and new sizes.
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
+// the minimum of the old and new sizes. In the case
+// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`.
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
 #endif  // TF_LITE_STATIC_MEMORY
 
 // WARNING: This is an experimental interface that is subject to change.
@@ -1135,6 +1203,17 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate(
 // 'delegate' is a null pointer.
 void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate);
 
+// Returns a pointer to the data associated with the provided opaque 'delegate'.
+//
+// A null pointer will be returned when:
+// - The 'delegate' is null.
+// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the
+//   'delegate' was null.
+// - Or in case of any other error.
+// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder',
+//   but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null.
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
index 77c766d25..cc881a3af 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -29,14 +29,15 @@ namespace reference_integer_ops {
 // zero_point (params.weights_offset) is always 0.
 // However, for per-tensor functions, params.weights_offset is still applied for
 // backward compatibility.
-
-inline void FullyConnectedPerChannel(
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnectedPerChannel(
     const FullyConnectedParams& params, const int32_t* output_multiplier,
     const int* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+    const InputType* input_data, const RuntimeShape& filter_shape,
+    const WeightType* filter_data, const RuntimeShape& bias_shape,
+    const BiasType* bias_data, const RuntimeShape& output_shape,
+    OutputType* output_data) {
   const int32_t input_offset = params.input_offset;
   const int32_t output_offset = params.output_offset;
   const int32_t output_activation_min = params.quantized_activation_min;
@@ -52,7 +53,7 @@ inline void FullyConnectedPerChannel(
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
@@ -61,62 +62,26 @@ inline void FullyConnectedPerChannel(
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
-                                          output_shift[out_c]);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-template <typename AccumScalar>
-inline void FullyConnectedPerChannel(
-    const FullyConnectedParams& params, const int32_t* output_multiplier,
-    const int* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += filter_val * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
       int32_t acc_scaled = MultiplyByQuantizedMultiplier(
           acc, output_multiplier[out_c], output_shift[out_c]);
+      acc_scaled += output_offset;
       acc_scaled = std::max(acc_scaled, output_activation_min);
       acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
 
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape,
+                    const InputType* input_data,
+                    const RuntimeShape& filter_shape,
+                    const WeightType* filter_data,
+                    const RuntimeShape& bias_shape, const BiasType* bias_data,
+                    const RuntimeShape& output_shape, OutputType* output_data) {
   const int32_t input_offset = params.input_offset;
   const int32_t filter_offset = params.weights_offset;
   const int32_t output_offset = params.output_offset;
@@ -136,7 +101,7 @@ inline void FullyConnected(
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
@@ -145,11 +110,13 @@ inline void FullyConnected(
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
+      int32_t acc_scaled =
+          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc_scaled += output_offset;
+      acc_scaled = std::max(acc_scaled, output_activation_min);
+      acc_scaled = std::min(acc_scaled, output_activation_max);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
@@ -168,48 +135,6 @@ inline void FullyConnectedWithPackedInt4Weights(
                  output_data);
 }
 
-template <typename AccumScalar>
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      int32_t acc_scaled =
-          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc_scaled = std::max(acc_scaled, output_activation_min);
-      acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
-    }
-  }
-}
-
 }  // namespace reference_integer_ops
 }  // namespace tflite
 
diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
index 22e897409..05066184c 100644
--- a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -24,10 +24,10 @@ limitations under the License.
 namespace tflite {
 namespace reference_integer_ops {
 
-template <typename T>
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
+template <typename InputType, typename OutputType>
+void MulElementwise(int size, const ArithmeticParams& params,
+                    const InputType* input1_data, const InputType* input2_data,
+                    OutputType* output_data) {
   for (int i = 0; i < size; ++i) {
     const int32_t input1_val = params.input1_offset + input1_data[i];
     const int32_t input2_val = params.input2_offset + input2_data[i];
@@ -39,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
     const int32_t clamped_output =
         std::min(params.quantized_activation_max,
                  std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
+    output_data[i] = static_cast<OutputType>(clamped_output);
   }
 }
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h
new file mode 100644
index 000000000..9481b09c3
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval_test.h
@@ -0,0 +1,690 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_
+#include <cstring>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/lstm_eval.h"
+#include "tensorflow/lite/micro/kernels/lstm_shared.h"
+#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h"
+#include "tensorflow/lite/micro/test_helpers.h"
+#include "tensorflow/lite/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+
+// IntegerLstmParameter is required by the legend int8 code. Not required for
+// the generalized standard LSTM (e.g., 16bits activation case)
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+IntegerLstmParameter CreateIntegerParameter(
+    const LstmNodeContents<int8_t, int8_t, int32_t, int16_t, batch_size,
+                           time_steps, input_dimension, state_dimension>&
+        quantized_node_contents) {
+  IntegerLstmParameter evaluation_params;
+  double effective_scale;
+  int buffer_shift_output;
+
+  const auto quantization_settings =
+      quantized_node_contents.QuantizationSettings();
+  effective_scale = quantization_settings.input.scale *
+                    quantization_settings.forget_gate.activation_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_input_to_forget_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_input_to_forget_scale_b = buffer_shift_output;
+  effective_scale = quantization_settings.output.scale *
+                    quantization_settings.forget_gate.recurrent_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_recurrent_to_forget_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_recurrent_to_forget_scale_b = buffer_shift_output;
+  // Set effective bias
+  evaluation_params.input_to_forget_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.ForgetGateData().activation_zp_folded_bias);
+  evaluation_params.recurrent_to_forget_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.ForgetGateData().recurrent_zp_folded_bias);
+
+  // input gate
+  effective_scale = quantization_settings.input.scale *
+                    quantization_settings.input_gate.activation_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_input_to_input_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_input_to_input_scale_b = buffer_shift_output;
+  effective_scale = quantization_settings.output.scale *
+                    quantization_settings.input_gate.recurrent_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_recurrent_to_input_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_recurrent_to_input_scale_b = buffer_shift_output;
+  // Set effective bias
+  evaluation_params.input_to_input_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.InputGateData().activation_zp_folded_bias);
+  evaluation_params.recurrent_to_input_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.InputGateData().recurrent_zp_folded_bias);
+
+  // cell gate
+  effective_scale = quantization_settings.input.scale *
+                    quantization_settings.cell_gate.activation_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_input_to_cell_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_input_to_cell_scale_b = buffer_shift_output;
+  effective_scale = quantization_settings.output.scale *
+                    quantization_settings.cell_gate.recurrent_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_recurrent_to_cell_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_recurrent_to_cell_scale_b = buffer_shift_output;
+  // Set effective bias
+  evaluation_params.input_to_cell_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.CellGateData().activation_zp_folded_bias);
+  evaluation_params.recurrent_to_cell_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.CellGateData().recurrent_zp_folded_bias);
+
+  // output gate
+  effective_scale = quantization_settings.input.scale *
+                    quantization_settings.output_gate.activation_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_input_to_output_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_input_to_output_scale_b = buffer_shift_output;
+  effective_scale = quantization_settings.output.scale *
+                    quantization_settings.output_gate.recurrent_weight.scale /
+                    quantization_settings.nonlinear_activation_input_scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_recurrent_to_output_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_recurrent_to_output_scale_b = buffer_shift_output;
+  // Set effective bias
+  evaluation_params.input_to_output_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.OutputGateData().activation_zp_folded_bias);
+  evaluation_params.recurrent_to_output_effective_bias = const_cast<int32_t*>(
+      quantized_node_contents.OutputGateData().recurrent_zp_folded_bias);
+
+  // hidden state (no projection, output is the hidden state)
+  effective_scale = quantization_settings.nonlinear_activation_output_scale *
+                    quantization_settings.nonlinear_activation_output_scale /
+                    quantization_settings.hidden_state.scale;
+  QuantizeMultiplier(effective_scale,
+                     &evaluation_params.effective_hidden_scale_a,
+                     &buffer_shift_output);
+  evaluation_params.effective_hidden_scale_b = buffer_shift_output;
+  evaluation_params.hidden_zp = quantization_settings.hidden_state.zero_point;
+
+  // cell state. Note, cell_scale is actually not a scale. 2^-cell_scale is
+  // the true scale for cell
+  int buffer_cell_scale;
+  tflite::CheckedLog2(quantization_settings.cell_state.scale,
+                      &buffer_cell_scale);
+  evaluation_params.cell_scale = buffer_cell_scale;
+
+  evaluation_params.quantized_cell_clip = static_cast<int16_t>(std::min(
+      std::max(
+          static_cast<double>(quantized_node_contents.BuiltinData().cell_clip) /
+              quantization_settings.cell_state.scale,
+          -32768.0),
+      32767.0));
+  return evaluation_params;
+}
+
+/*TEST HELPER FUNCTIONS*/
+template <typename T>
+void ValidateResultGoldens(const T* golden, const T* output_data,
+                           const int output_len, const float tolerance) {
+  for (int i = 0; i < output_len; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance);
+  }
+}
+
+template <int batch_size, int input_dimension, int state_dimension>
+void TestGateOutputFloat(
+    const GateData<float, float, input_dimension, state_dimension>& gate_params,
+    const TfLiteFusedActivation activation_type, const float* input_data,
+    const float* hidden_state, const float* expected_vals,
+    const float tolerance) {
+  float gate_output[batch_size * state_dimension] = {};
+  tflite::lstm_internal::CalculateLstmGateFloat(
+      input_data, gate_params.activation_weight,
+      /*aux_input=*/nullptr, /*aux_input_to_gate_weights*/ nullptr,
+      hidden_state, gate_params.recurrent_weight,
+      /*cell_state=*/nullptr, /*cell_to_gate_weights=*/nullptr,
+      /*layer_norm_coefficients=*/nullptr, gate_params.fused_bias, batch_size,
+      input_dimension, input_dimension, state_dimension, state_dimension,
+      /*activation=*/activation_type, gate_output,
+      /*is_input_all_zeros=*/false,
+      /*is_aux_input_all_zeros=*/true);
+  ValidateResultGoldens(expected_vals, gate_output,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename BiasType, typename CellType,
+          int batch_size, int input_dimension, int state_dimension>
+void TestGateOutputQuantized(
+    const ActivationType* quantized_input,
+    const ActivationType* quantized_hidden_state,
+    const GateData<int8_t, BiasType, input_dimension, state_dimension>&
+        gate_params,
+    const NodeQuantizationParameters& quantization_settings,
+    int32_t effective_input_to_gate_scale_a,
+    int32_t effective_input_to_gate_scale_b,
+    int32_t effective_recurrent_to_gate_scale_a,
+    int32_t effective_recurrent_to_gate_scale_b,
+    TfLiteFusedActivation nonlinear_type, const float* expected_vals,
+    float tolerance) {
+  CellType gate_output[batch_size * state_dimension] = {};
+  BiasType scratch_buffer[batch_size * state_dimension] = {};
+
+  tflite::lstm_internal::CalculateLstmGateInteger8x8_16(
+      // Input and weights
+      quantized_input, gate_params.activation_weight,
+      gate_params.activation_zp_folded_bias, effective_input_to_gate_scale_a,
+      effective_input_to_gate_scale_b,
+      // Output state and weights
+      quantized_hidden_state, gate_params.activation_weight,
+      gate_params.recurrent_zp_folded_bias, effective_recurrent_to_gate_scale_a,
+      effective_recurrent_to_gate_scale_b,
+      // Cell state and weights
+      nullptr, nullptr, 0, 0,
+      // Layer normalization parameters (layer norm LSTM)
+      nullptr, nullptr, 0, 0, 0,
+      // Array sizes
+      batch_size, input_dimension, state_dimension, state_dimension,
+      nonlinear_type,
+      // Output
+      gate_output,
+      // Parameters for performance optimizations
+      // Scratch arrays
+      scratch_buffer);
+
+  float gate_output_float[batch_size * state_dimension] = {};
+  Dequantize(gate_output, batch_size * state_dimension,
+             quantization_settings.nonlinear_activation_output_scale, 0,
+             gate_output_float);
+
+  ValidateResultGoldens(expected_vals, gate_output_float,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <int batch_size, int input_dimension, int state_dimension>
+void TestCellUpdateFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float cell_clip, const float tolerance) {
+  // copy the data since it will be updated
+  float cell_state[batch_size * state_dimension] = {};
+  std::memcpy(cell_state, gate_output_data.cell_state,
+              batch_size * state_dimension * sizeof(float));
+
+  float forget_gate[batch_size * state_dimension] = {};
+  std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output,
+              batch_size * state_dimension * sizeof(float));
+
+  tflite::lstm_internal::UpdateLstmCellFloat(
+      batch_size, state_dimension, cell_state,
+      gate_output_data.expected_input_gate_output, forget_gate,
+      gate_output_data.expected_cell_gate_output,
+      /*use_cifg=*/false, cell_clip);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell, cell_state,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename BiasType, typename CellType,
+          int batch_size, int input_dimension, int state_dimension>
+void TestCellUpdateQuantized(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const NodeQuantizationParameters& quantization_settings,
+    const int32_t cell_scale_shift, const CellType quantized_cell_clip,
+    const float tolerance) {
+  CellType quantized_cell_state[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.cell_state, quantized_cell_state,
+                   batch_size * state_dimension,
+                   quantization_settings.cell_state.scale,
+                   quantization_settings.cell_state.zero_point);
+
+  CellType quantized_forget_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_forget_gate_output,
+                   quantized_forget_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_input_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_input_gate_output,
+                   quantized_input_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_cell_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_cell_gate_output,
+                   quantized_cell_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  tflite::lstm_internal::UpdateLstmCellInteger(
+      batch_size, state_dimension, quantized_cell_state, cell_scale_shift,
+      quantized_input_gate, quantized_forget_gate, quantized_cell_gate, false,
+      quantized_cell_clip);
+
+  float cell_state_float[batch_size * state_dimension] = {};
+  Dequantize(quantized_cell_state, batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point, cell_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        cell_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int input_dimension, int state_dimension>
+void TestHiddenStateUpdateFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float tolerance) {
+  // If no projection layer, hidden state dimension == output dimension ==
+  // cell state dimension
+  float output[batch_size * state_dimension] = {};
+  float scratch[batch_size * state_dimension] = {};
+
+  tflite::lstm_internal::CalculateLstmOutputFloat(
+      batch_size, state_dimension, state_dimension,
+      gate_output_data.expected_updated_cell,
+      gate_output_data.expected_output_gate_output, kTfLiteActTanh, nullptr,
+      nullptr, 0, output, scratch);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden, output,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename BiasType, typename CellType,
+          int batch_size, int input_dimension, int state_dimension>
+void TestHiddenStateUpdateQuantized(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const NodeQuantizationParameters& quantization_settings,
+    const IntegerLstmParameter& evaluation_params, const float tolerance) {
+  CellType quantized_cell_state[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_updated_cell, quantized_cell_state,
+                   batch_size * state_dimension,
+                   quantization_settings.cell_state.scale,
+                   quantization_settings.cell_state.zero_point);
+
+  CellType quantized_output_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_output_gate_output,
+                   quantized_output_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  // scratches
+  int16_t scratch0[batch_size * state_dimension] = {};
+  int8_t scratch1[batch_size * state_dimension] = {};
+  int32_t scratch2[batch_size * state_dimension] = {};
+
+  // output (updated hidden state)
+  int8_t output_state[batch_size * state_dimension] = {};
+
+  tflite::lstm_internal::CalculateLstmOutputInteger8x8_16(
+      batch_size, state_dimension, state_dimension, quantized_cell_state,
+      evaluation_params.cell_scale, quantized_output_gate,
+      evaluation_params.effective_hidden_scale_a,
+      evaluation_params.effective_hidden_scale_b, evaluation_params.hidden_zp,
+      /*projection_weights=*/nullptr, /*proj_scale_a=*/0, 0, 0,
+      /*output_state_zp=*/evaluation_params.hidden_zp,
+      evaluation_params.quantized_proj_clip, output_state, scratch0, scratch1,
+      scratch2);
+
+  float output_state_float[batch_size * state_dimension] = {};
+  Dequantize(output_state, batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point, output_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        output_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestOneStepLSTMFloat(
+    const TfLiteLSTMParams& general_model_settings,
+    /*can not be const, state will be updated*/
+    LstmNodeContents<float, float, float, float, batch_size, time_steps,
+                     input_dimension, state_dimension>& node_contents,
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float tolerance) {
+  // scratch buffers
+  float forget_gate_scratch[batch_size * state_dimension] = {};
+  float input_gate_scratch[batch_size * state_dimension] = {};
+  float cell_gate_scratch[batch_size * state_dimension] = {};
+  float output_gate_scratch[batch_size * state_dimension] = {};
+
+  // states and output will be modified (cannot use the const getter)
+  float* hidden_state = node_contents.GetHiddenStateData();
+  float* cell_state = node_contents.GetCellStateData();
+  float* output = node_contents.GetOutputData();
+
+  tflite::lstm_internal::LstmStepFloat(
+      gate_output_data.input_data,
+      node_contents.InputGateData().activation_weight,
+      node_contents.ForgetGateData().activation_weight,
+      node_contents.CellGateData().activation_weight,
+      node_contents.OutputGateData().activation_weight,
+      /*aux_input_ptr=*/nullptr, /*aux_input_to_input_weights_ptr=*/nullptr,
+      /*aux_input_to_forget_weights_ptr=*/nullptr,
+      /*aux_input_to_cell_weights_ptr=*/nullptr,
+      /*aux_input_to_output_weights_ptr=*/nullptr,
+      node_contents.InputGateData().recurrent_weight,
+      node_contents.ForgetGateData().recurrent_weight,
+      node_contents.CellGateData().recurrent_weight,
+      node_contents.OutputGateData().recurrent_weight,
+      /*cell_to_input_weights_ptr=*/nullptr,
+      /*cell_to_forget_weights_ptr=*/nullptr,
+      /*cell_to_output_weights_ptr=*/nullptr,
+      /*input_layer_norm_coefficients_ptr=*/nullptr,
+      /*forget_layer_norm_coefficients_ptr=*/nullptr,
+      /*cell_layer_norm_coefficients_ptr=*/nullptr,
+      /*output_layer_norm_coefficients_ptr=*/nullptr,
+      node_contents.InputGateData().fused_bias,
+      node_contents.ForgetGateData().fused_bias,
+      node_contents.CellGateData().fused_bias,
+      node_contents.OutputGateData().fused_bias,
+      /*projection_weights_ptr=*/nullptr, /*projection_bias_ptr=*/nullptr,
+      &general_model_settings, batch_size, state_dimension, input_dimension,
+      input_dimension, state_dimension,
+      /*output_batch_leading_dim=*/0, hidden_state, cell_state,
+      input_gate_scratch, forget_gate_scratch, cell_gate_scratch,
+      output_gate_scratch, output);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden, hidden_state,
+                        batch_size * state_dimension, tolerance);
+  ValidateResultGoldens(gate_output_data.expected_updated_cell, cell_state,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename BiasType, typename CellType,
+          int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestOneStepLSTMQuantized(
+    /*can not be const, state will be updated*/
+    LstmNodeContents<ActivationType, int8_t, BiasType, CellType, batch_size,
+                     time_steps, input_dimension, state_dimension>&
+        model_contents,
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance) {
+  // Scratch buffers
+  CellType scratch0[batch_size * state_dimension] = {};
+  CellType scratch1[batch_size * state_dimension] = {};
+  CellType scratch2[batch_size * state_dimension] = {};
+  CellType scratch3[batch_size * state_dimension] = {};
+  ActivationType scratch4[batch_size * state_dimension] = {};
+  BiasType scratch5[batch_size * state_dimension] = {};
+
+  // states and output will be modified (cannot use the const getter)
+  ActivationType* hidden_state = model_contents.GetHiddenStateData();
+  CellType* cell_state = model_contents.GetCellStateData();
+  ActivationType* output = model_contents.GetOutputData();
+
+  const auto evaluation_params =
+      tflite::testing::CreateIntegerParameter(model_contents);
+  const auto quantization_settings = model_contents.QuantizationSettings();
+
+  tflite::lstm_internal::LstmStepInteger8x8_16(
+      model_contents.GetInputData(),
+      model_contents.InputGateData().activation_weight,
+      evaluation_params.effective_input_to_input_scale_a,
+      evaluation_params.effective_input_to_input_scale_b,
+      model_contents.ForgetGateData().activation_weight,
+      evaluation_params.effective_input_to_forget_scale_a,
+      evaluation_params.effective_input_to_forget_scale_b,
+      model_contents.CellGateData().activation_weight,
+      evaluation_params.effective_input_to_cell_scale_a,
+      evaluation_params.effective_input_to_cell_scale_b,
+      model_contents.OutputGateData().activation_weight,
+      evaluation_params.effective_input_to_output_scale_a,
+      evaluation_params.effective_input_to_output_scale_b,
+      model_contents.InputGateData().recurrent_weight,
+      evaluation_params.effective_recurrent_to_input_scale_a,
+      evaluation_params.effective_recurrent_to_input_scale_b,
+      model_contents.ForgetGateData().recurrent_weight,
+      evaluation_params.effective_recurrent_to_forget_scale_a,
+      evaluation_params.effective_recurrent_to_forget_scale_b,
+      model_contents.CellGateData().recurrent_weight,
+      evaluation_params.effective_recurrent_to_cell_scale_a,
+      evaluation_params.effective_recurrent_to_cell_scale_b,
+      model_contents.OutputGateData().recurrent_weight,
+      evaluation_params.effective_recurrent_to_output_scale_a,
+      evaluation_params.effective_recurrent_to_output_scale_b,
+      /*cell_to_input_weight_ptr=*/nullptr,
+      /*effective_cell_to_input_scale_a=*/0,
+      /*effective_cell_to_input_scale_b=*/0,
+      /*cell_to_forget_weight_ptr=*/nullptr,
+      /*effective_cell_to_forget_scale_a=*/0,
+      /*effective_cell_to_forget_scale_b=*/0,
+      /*cell_to_output_weight_ptr=*/nullptr,
+      /*effective_cell_to_output_scale_a=*/0,
+      /*effective_cell_to_output_scale_b=*/0,
+      /*projection_weight_ptr=*/nullptr, /*effective_proj_scale_a=*/0,
+      /*effective_proj_scale_b=*/0, evaluation_params.hidden_zp,
+      evaluation_params.effective_hidden_scale_a,
+      evaluation_params.effective_hidden_scale_b,
+      /*layer_norm_input_weight_ptr=*/nullptr,
+      /*layer_norm_input_scale_a=*/0, /*layer_norm_input_scale_b=*/0,
+      /*layer_norm_forget_weight_ptr=*/nullptr,
+      /*layer_norm_forget_scale_a=*/0, /*layer_norm_forget_scale_b=*/0,
+      /*layer_norm_cell_weight_ptr=*/nullptr,
+      /*layer_norm_cell_scale_a=*/0, /*layer_norm_cell_scale_b=*/0,
+      /*layer_norm_output_weight_ptr=*/nullptr,
+      /*layer_norm_output_scale_a=*/0, /*layer_norm_output_scale_b=*/0,
+      /*input_gate_bias_ptr=*/nullptr, /*forget_gate_bias_ptr=*/nullptr,
+      /*cell_gate_bias_ptr=*/nullptr, /*output_gate_bias_ptr=*/nullptr,
+      evaluation_params.quantized_cell_clip,
+      evaluation_params.quantized_proj_clip, evaluation_params.cell_scale,
+      /*input_variance_guard=*/0, /*forget_variance_guard=*/0,
+      /*cell_variance_guard=*/0, /*output_variance_guard=*/0,
+      evaluation_params.input_to_forget_effective_bias,
+      evaluation_params.recurrent_to_forget_effective_bias,
+      evaluation_params.input_to_cell_effective_bias,
+      evaluation_params.recurrent_to_cell_effective_bias,
+      evaluation_params.input_to_output_effective_bias,
+      evaluation_params.recurrent_to_output_effective_bias,
+      evaluation_params.input_to_input_effective_bias,
+      evaluation_params.recurrent_to_input_effective_bias,
+      evaluation_params.projection_effective_bias, batch_size, input_dimension,
+      state_dimension, state_dimension, hidden_state,
+      quantization_settings.output.zero_point, cell_state, output, scratch0,
+      scratch1, scratch2, scratch3, scratch4, scratch5);
+
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(hidden_state, batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point,
+             dequantized_hidden_state);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(cell_state, batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point,
+             dequantized_cell_state);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestLSTMEvalFloat(
+    /*can not be const, state will be updated*/
+    LstmNodeContents<float, float, float, float, batch_size, time_steps,
+                     input_dimension, state_dimension>& float_model_contents,
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float tolerance) {
+  float scratch_buffers[4 * batch_size * state_dimension] = {};
+  auto general_model_settings = float_model_contents.BuiltinData();
+  tflite::EvalFloatLstm(
+      float_model_contents.GetEvalTensor(kLstmInputTensor),
+      float_model_contents.GetEvalTensor(kLstmInputToInputWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmInputToCellWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor),
+      float_model_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor),
+      /*cell_to_input_weights=*/nullptr,
+      /*cell_to_forget_weights=*/nullptr,
+      /*cell_to_output_weights=*/nullptr,
+      /*input_layer_norm_coefficients=*/nullptr,
+      /*forget_layer_norm_coefficients=*/nullptr,
+      /*cell_layer_norm_coefficients=*/nullptr,
+      /*output_layer_norm_coefficients=*/nullptr,
+      /*aux_input=*/nullptr,
+      /*aux_input_to_input_weights=*/nullptr,
+      /*aux_input_to_forget_weights=*/nullptr,
+      /*aux_input_to_cell_weights=*/nullptr,
+      /*aux_input_to_output_weights=*/nullptr,
+      float_model_contents.GetEvalTensor(kLstmInputGateBiasTensor),
+      float_model_contents.GetEvalTensor(kLstmForgetGateBiasTensor),
+      float_model_contents.GetEvalTensor(kLstmCellGateBiasTensor),
+      float_model_contents.GetEvalTensor(kLstmOutputGateBiasTensor),
+      /*projection_weights=*/nullptr,
+      /*projection_bias=*/nullptr, &general_model_settings,
+      /*forward_sequence=*/true, /*time_major=*/false,
+      /*output_offset=*/0, scratch_buffers,
+      float_model_contents.HiddenStateEvalTensor(),
+      float_model_contents.CellStateEvalTensor(),
+      float_model_contents.OutputEvalTensor());
+
+  // Validate hidden state. See previous test for the calculation
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        float_model_contents.GetHiddenStateData(),
+                        batch_size * state_dimension, tolerance);
+  // Validate cell state. See previous test for the calculation
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        float_model_contents.GetCellStateData(),
+                        batch_size * state_dimension, tolerance);
+  // Validate output . See previous test for the calculation
+  ValidateResultGoldens(eval_check_data.expected_output,
+                        float_model_contents.GetOutputData(),
+                        batch_size * state_dimension * time_steps, tolerance);
+}
+
+template <typename ActivationType, typename BiasType, typename CellType,
+          int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestLSTMEvalQuantized(
+    /*can not be const, state will be updated*/
+    LstmNodeContents<ActivationType, int8_t, BiasType, CellType, batch_size,
+                     time_steps, input_dimension, state_dimension>&
+        quantized_model_content,
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance) {
+  // Scratch buffers
+  CellType scratch0[batch_size * state_dimension] = {};
+  CellType scratch1[batch_size * state_dimension] = {};
+  CellType scratch2[batch_size * state_dimension] = {};
+  CellType scratch3[batch_size * state_dimension] = {};
+  ActivationType scratch4[batch_size * state_dimension * time_steps] = {};
+  BiasType scratch5[batch_size * state_dimension] = {};
+
+  const auto quantization_settings =
+      quantized_model_content.QuantizationSettings();
+  const auto evaluation_params =
+      tflite::testing::CreateIntegerParameter(quantized_model_content);
+  const auto general_model_settings = quantized_model_content.BuiltinData();
+
+  EvalInteger8x8_16Lstm(
+      quantized_model_content.GetEvalTensor(kLstmInputTensor),
+      quantized_model_content.GetEvalTensor(kLstmInputToInputWeightsTensor),
+      quantized_model_content.GetEvalTensor(kLstmInputToForgetWeightsTensor),
+      quantized_model_content.GetEvalTensor(kLstmInputToCellWeightsTensor),
+      quantized_model_content.GetEvalTensor(kLstmInputToOutputWeightsTensor),
+      quantized_model_content.GetEvalTensor(kLstmRecurrentToInputWeightsTensor),
+      quantized_model_content.GetEvalTensor(
+          kLstmRecurrentToForgetWeightsTensor),
+      quantized_model_content.GetEvalTensor(kLstmRecurrentToCellWeightsTensor),
+      quantized_model_content.GetEvalTensor(
+          kLstmRecurrentToOutputWeightsTensor),
+      /*cell_to_input_weights=*/nullptr,
+      /*cell_to_forget_weights=*/nullptr,
+      /*cell_to_output_weights=*/nullptr,
+      /*input_layer_norm_coefficients=*/nullptr,
+      /*forget_layer_norm_coefficients=*/nullptr,
+      /*cell_layer_norm_coefficients=*/nullptr,
+      /*output_layer_norm_coefficients=*/nullptr,
+      quantized_model_content.GetEvalTensor(kLstmInputGateBiasTensor),
+      quantized_model_content.GetEvalTensor(kLstmForgetGateBiasTensor),
+      quantized_model_content.GetEvalTensor(kLstmCellGateBiasTensor),
+      quantized_model_content.GetEvalTensor(kLstmOutputGateBiasTensor),
+      /*projection_weights=*/nullptr,
+      /*projection_bias=*/nullptr, &general_model_settings,
+      /*forward_sequence=*/true, /*time_major=*/false, &evaluation_params,
+      quantization_settings.output.zero_point,
+      quantized_model_content.HiddenStateEvalTensor(),
+      quantized_model_content.CellStateEvalTensor(),
+      quantized_model_content.OutputEvalTensor(), scratch0, scratch1, scratch2,
+      scratch3, scratch4, scratch5);
+
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(
+      quantized_model_content.GetHiddenStateData(),
+      batch_size * state_dimension, quantization_settings.hidden_state.scale,
+      quantization_settings.hidden_state.zero_point, dequantized_hidden_state);
+
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(
+      quantized_model_content.GetCellStateData(), batch_size * state_dimension,
+      quantization_settings.cell_state.scale,
+      quantization_settings.cell_state.zero_point, dequantized_cell_state);
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+
+  float dequantized_output[batch_size * state_dimension * time_steps] = {};
+  Dequantize(quantized_model_content.GetOutputData(),
+             batch_size * state_dimension * time_steps,
+             quantization_settings.output.scale,
+             quantization_settings.output.zero_point, dequantized_output);
+  ValidateResultGoldens(eval_check_data.expected_output, dequantized_output,
+                        batch_size * state_dimension, hidden_state_tolerance);
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_COMMOM_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
index df2a8d2c3..0f9df42e7 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/micro_ops.h
@@ -70,6 +70,7 @@ TfLiteRegistration Register_MIRROR_PAD();
 TfLiteRegistration Register_NEG();
 TfLiteRegistration Register_PRELU();
 TfLiteRegistration Register_MUL();
+TfLiteRegistration Register_PACK();
 TfLiteRegistration Register_PAD();
 TfLiteRegistration Register_PADV2();
 TfLiteRegistration Register_QUANTIZE();
@@ -113,7 +114,6 @@ TfLiteRegistration Register_LOGICAL_NOT();
 TfLiteRegistration Register_MAXIMUM();
 TfLiteRegistration Register_MINIMUM();
 TfLiteRegistration Register_NOT_EQUAL();
-TfLiteRegistration Register_PACK();
 TfLiteRegistration Register_RESHAPE();
 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
 TfLiteRegistration Register_ROUND();
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
index 5e322b87b..5a4eb4f53 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pack.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,9 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pack {
+
 namespace {
 
 constexpr int kOutputTensor = 0;
@@ -106,12 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }
 
 }  // namespace
-}  // namespace pack
 
 TfLiteRegistration Register_PACK() {
-  return tflite::micro::RegisterOp(nullptr, nullptr, pack::Eval);
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
index d9b147ad8..050913c5a 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,7 +43,12 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
       AveragePoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      AveragePoolingEvalQuantized(context, node, params, data, input, output);
+      AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                          output);
+      break;
+    case kTfLiteInt16:
+      AveragePoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                           output);
       break;
     default:
       MicroPrintf("Input type %s is not currently supported",
@@ -71,7 +76,12 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
       MaxPoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      MaxPoolingEvalQuantized(context, node, params, data, input, output);
+      MaxPoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                      output);
+      break;
+    case kTfLiteInt16:
+      MaxPoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                       output);
       break;
     default:
       MicroPrintf("Type %s not currently supported.",
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
index 493250ee1..7b322480c 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling.h
@@ -20,7 +20,14 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
+#include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
@@ -50,27 +57,69 @@ void AveragePoolingEvalFloat(const TfLiteContext* context,
                              const TfLiteEvalTensor* input,
                              TfLiteEvalTensor* output);
 
+template <typename T>
 void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
                                  const TfLitePoolParams* params,
                                  const OpDataPooling* data,
                                  const TfLiteEvalTensor* input,
-                                 TfLiteEvalTensor* output);
+                                 TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::AveragePool(op_params,
+                                     tflite::micro::GetTensorShape(input),
+                                     tflite::micro::GetTensorData<T>(input),
+                                     tflite::micro::GetTensorShape(output),
+                                     tflite::micro::GetTensorData<T>(output));
+}
 
 void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          TfLitePoolParams* params, const OpDataPooling* data,
                          const TfLiteEvalTensor* input,
                          TfLiteEvalTensor* output);
 
+template <typename T>
 void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
                              TfLitePoolParams* params,
                              const OpDataPooling* data,
                              const TfLiteEvalTensor* input,
-                             TfLiteEvalTensor* output);
+                             TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::MaxPool(op_params,
+                                 tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<T>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<T>(output));
+}
 
 #if defined(CMSIS_NN)
 TfLiteRegistration Register_AVERAGE_POOL_2D_INT8();
 
 TfLiteRegistration Register_MAX_POOL_2D_INT8();
+
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT16();
+
+TfLiteRegistration Register_MAX_POOL_2D_INT16();
 #else
 inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
   return tflite::Register_AVERAGE_POOL_2D();
@@ -79,6 +128,14 @@ inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
 inline TfLiteRegistration Register_MAX_POOL_2D_INT8() {
   return tflite::Register_MAX_POOL_2D();
 }
+
+inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() {
+  return tflite::Register_AVERAGE_POOL_2D();
+}
+
+inline TfLiteRegistration Register_MAX_POOL_2D_INT16() {
+  return tflite::Register_MAX_POOL_2D();
+}
 #endif
 }  // namespace tflite
 
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
index ddc18f0bb..b39e9d846 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/pooling_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -69,10 +69,14 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
   if (input->type == kTfLiteFloat32) {
     CalculateActivationRange(params->activation, &data->activation_min_f32,
                              &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8) {
+  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     CalculateActivationRangeQuantized(context, params->activation, output,
                                       &data->activation_min,
                                       &data->activation_max);
+  } else {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
+    return kTfLiteError;
   }
 
   micro_context->DeallocateTempTfLiteTensor(input);
@@ -102,30 +106,6 @@ void AveragePoolingEvalFloat(const TfLiteContext* context,
                              tflite::micro::GetTensorData<float>(output));
 }
 
-void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
-                                 const TfLitePoolParams* params,
-                                 const OpDataPooling* data,
-                                 const TfLiteEvalTensor* input,
-                                 TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->type == kTfLiteInt8);
-
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  reference_integer_ops::AveragePool(
-      op_params, tflite::micro::GetTensorShape(input),
-      tflite::micro::GetTensorData<int8_t>(input),
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<int8_t>(output));
-}
-
 void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          TfLitePoolParams* params, const OpDataPooling* data,
                          const TfLiteEvalTensor* input,
@@ -145,26 +125,4 @@ void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          tflite::micro::GetTensorData<float>(output));
 }
 
-void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                             TfLitePoolParams* params,
-                             const OpDataPooling* data,
-                             const TfLiteEvalTensor* input,
-                             TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  reference_integer_ops::MaxPool(op_params,
-                                 tflite::micro::GetTensorShape(input),
-                                 tflite::micro::GetTensorData<int8_t>(input),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<int8_t>(output));
-}
-
 }  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
index e7187ef0d..0698846ff 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/BUILD
@@ -15,3 +15,16 @@ cc_library(
     hdrs = ["conv_test_data.h"],
     deps = ["//tensorflow/lite/c:common"],
 )
+
+cc_library(
+    name = "lstm_test_data",
+    srcs = ["lstm_test_data.cc"],
+    hdrs = [
+        "lstm_test_data.h",
+    ],
+    deps = [
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/micro:test_helpers",
+        "//tensorflow/lite/micro/kernels:lstm_shared",
+    ],
+)
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc
new file mode 100644
index 000000000..4cfe1069a
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.cc
@@ -0,0 +1,252 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/kernels/testdata/lstm_test_data.h"
+
+#include <cstring>
+
+namespace tflite {
+namespace testing {
+
+namespace {
+// LSTM internal setting (e.g., nonlinear activation type)
+constexpr TfLiteLSTMParams kDefaultBuiltinData = {
+    /*.activation=*/kTfLiteActTanh,
+    /*.cell_clip=*/6, /*.proj_clip=*/3,
+    /*.kernel_type=*/kTfLiteLSTMFullKernel,
+    /*.asymmetric_quantize_inputs=*/true};
+}  // namespace
+
+GateOutputCheckData<4, 4> Get2X2GateOutputCheckData() {
+  GateOutputCheckData<4, 4> gate_data;
+  const float input_data[4] = {
+      0.2, 0.3,    // batch1
+      -0.98, 0.62  // batch2
+  };
+  std::memcpy(gate_data.input_data, input_data, 4 * sizeof(float));
+
+  const float hidden_state[4] = {
+      -0.1, 0.2,  // batch1
+      -0.3, 0.5   // batch2
+  };
+  std::memcpy(gate_data.hidden_state, hidden_state, 4 * sizeof(float));
+
+  const float cell_state[4] = {
+      -1.3, 6.2,  // batch1
+      -7.3, 3.5   // batch2
+  };
+  std::memcpy(gate_data.cell_state, cell_state, 4 * sizeof(float));
+
+  // Use the forget gate parameters to test small gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[-10,-10],[-20,-20]][0.2,
+  // +[[-10,-10],[-20,-20]][-0.1, 0.2]+[1,2]) = sigmoid([-5,-10]) =
+  // [6.69285092e-03, 4.53978687e-05] (Batch1)
+  // Similarly, we have [0.93086158 0.9945137 ] for batch 2
+  const float expected_forget_gate_output[4] = {6.69285092e-3f, 4.53978687e-5f,
+                                                0.93086158, 0.9945137};
+  std::memcpy(gate_data.expected_forget_gate_output,
+              expected_forget_gate_output, 4 * sizeof(float));
+
+  // Use the input gate parameters to test small gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[10,10],[20,20]][0.2, 0.3]
+  // +[[10,10],[20,20]][-0.1, 0.2]+[-1,-2]) = sigmoid([5,10]) =
+  // [0.99330715, 0.9999546]
+  // Similarly, we have [0.06913842 0.0054863 ] for batch 2
+  const float expected_input_gate_output[4] = {0.99330715, 0.9999546,
+                                               0.06913842, 0.0054863};
+  std::memcpy(gate_data.expected_input_gate_output, expected_input_gate_output,
+              4 * sizeof(float));
+
+  // Use the output gate parameters to test normnal gate outputs
+  // output = sigmoid(W_i*i+W_h*h+b) = sigmoid([[1,1],[1,1]][0.2, 0.3]
+  // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = sigmoid([0.6,0.6]) =
+  // [0.6456563062257954, 0.6456563062257954]
+  // Similarly, we have [[0.46008512 0.46008512]] for batch 2
+  const float expected_output_gate_output[4] = {
+      0.6456563062257954, 0.6456563062257954, 0.46008512, 0.46008512};
+  std::memcpy(gate_data.expected_output_gate_output,
+              expected_output_gate_output, 4 * sizeof(float));
+
+  // Use the cell(modulation) gate parameters to tanh output
+  // output = tanh(W_i*i+W_h*h+b) = tanh([[1,1],[1,1]][0.2, 0.3]
+  // +[[1,1],[1,1]][-0.1, 0.2]+[0,0]) = tanh([0.6,0.6]) =
+  // [0.6456563062257954, 0.6456563062257954]
+  // Similarly, we have [-0.1586485 -0.1586485] for batch 2
+  const float expected_cell_gate_output[4] = {
+      0.5370495669980353, 0.5370495669980353, -0.1586485, -0.1586485};
+  std::memcpy(gate_data.expected_cell_gate_output, expected_cell_gate_output,
+              4 * sizeof(float));
+
+  // Cell = forget_gate*cell + input_gate*cell_gate
+  // Note -6.80625824 is clipped to -6
+  const float expected_updated_cell[4] = {0.52475447, 0.53730665, -6,
+                                          3.47992756};
+  std::memcpy(gate_data.expected_updated_cell, expected_updated_cell,
+              4 * sizeof(float));
+
+  // Use the updated cell state to update the hidden state
+  // tanh(expected_updated_cell) * expected_output_gate_output
+  const float expected_updated_hidden[4] = {0.31079388, 0.3169827, -0.46007947,
+                                            0.45921249};
+  std::memcpy(gate_data.expected_updated_hidden, expected_updated_hidden,
+              4 * sizeof(float));
+  return gate_data;
+}
+
+// TODO(b/253466487): document how the golden values are arrived at
+LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData() {
+  LstmEvalCheckData<12, 4, 12> eval_data;
+  const float input_data[12] = {
+      0.2,   0.3,  0.2,  0.3,  0.2,  0.3,   // batch one
+      -0.98, 0.62, 0.01, 0.99, 0.49, -0.32  // batch two
+  };
+  std::memcpy(eval_data.input_data, input_data, 12 * sizeof(float));
+
+  // Initialize hidden state as zeros
+  const float hidden_state[4] = {};
+  std::memcpy(eval_data.hidden_state, hidden_state, 4 * sizeof(float));
+
+  // The expected model output after 3 time steps using the fixed input and
+  // parameters
+  const float expected_output[12] = {
+      0.26455893,      0.26870455,      0.47935803,
+      0.47937014,      0.58013272,      0.58013278,  // batch1
+      -1.41184672e-3f, -1.43329117e-5f, 0.46887168,
+      0.46891281,      0.50054074,      0.50054148  // batch2
+  };
+  std::memcpy(eval_data.expected_output, expected_output, 12 * sizeof(float));
+
+  const float expected_hidden_state[4] = {
+      0.58013272, 0.58013278,  // batch1
+      0.50054074, 0.50054148   // batch2
+  };
+  std::memcpy(eval_data.expected_hidden_state, expected_hidden_state,
+              4 * sizeof(float));
+
+  const float expected_cell_state[4] = {
+      0.89740515, 0.8974053,  // batch1
+      0.80327607, 0.80327785  // batch2
+  };
+  std::memcpy(eval_data.expected_cell_state, expected_cell_state,
+              4 * sizeof(float));
+  return eval_data;
+}
+
+LstmNodeContents<float, float, float, float, 2, 3, 2, 2>
+Create2x3x2X2FloatNodeContents(const float* input_data,
+                               const float* hidden_state_data,
+                               const float* cell_state_data) {
+  // Parameters for different gates
+  // negative large weights for forget gate to make it really forget
+  const GateData<float, float, 2, 2> forget_gate_data = {
+      /*.activation_weight=*/{-10, -10, -20, -20},
+      /*.recurrent_weight=*/{-10, -10, -20, -20},
+      /*.fused_bias=*/{1, 2},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // positive large weights for input gate to make it really remember
+  const GateData<float, float, 2, 2> input_gate_data = {
+      /*.activation_weight=*/{10, 10, 20, 20},
+      /*.recurrent_weight=*/{10, 10, 20, 20},
+      /*.fused_bias=*/{-1, -2},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // all ones to test the behavior of tanh at normal range (-1,1)
+  const GateData<float, float, 2, 2> cell_gate_data = {
+      /*.activation_weight=*/{1, 1, 1, 1},
+      /*.recurrent_weight=*/{1, 1, 1, 1},
+      /*.fused_bias=*/{0, 0},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+  // all ones to test the behavior of sigmoid at normal range (-1. 1)
+  const GateData<float, float, 2, 2> output_gate_data = {
+      /*.activation_weight=*/{1, 1, 1, 1},
+      /*.recurrent_weight=*/{1, 1, 1, 1},
+      /*.fused_bias=*/{0, 0},
+      /*activation_zp_folded_bias=*/{0, 0},
+      /*recurrent_zp_folded_bias=*/{0, 0}};
+
+  LstmNodeContents<float, float, float, float, 2, 3, 2, 2> float_node_contents(
+      kDefaultBuiltinData, forget_gate_data, input_gate_data, cell_gate_data,
+      output_gate_data);
+
+  if (input_data != nullptr) {
+    float_node_contents.SetInputData(input_data);
+  }
+  if (hidden_state_data != nullptr) {
+    float_node_contents.SetHiddenStateData(hidden_state_data);
+  }
+  if (cell_state_data != nullptr) {
+    float_node_contents.SetCellStateData(cell_state_data);
+  }
+  return float_node_contents;
+}
+
+NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings() {
+  NodeQuantizationParameters quantization_settings;
+  quantization_settings.activation_type = kTfLiteInt8;
+  quantization_settings.weight_type = kTfLiteInt8;
+  quantization_settings.cell_type = kTfLiteInt16;
+  quantization_settings.bias_type = kTfLiteInt32;
+  quantization_settings.nonlinear_activation_input_scale =
+      0.00024414062;  // std::pow(2.0f, -12.0f)
+  quantization_settings.nonlinear_activation_output_scale =
+      0.00003051757;  // std::pow(2.0f, -15.0f)
+
+  // state quantization parameters
+  quantization_settings.input = {/*scale=*/0.00784313725490196, /*zp=*/0,
+                                 /*symmetry=*/false};
+  quantization_settings.output = {/*scale=*/0.004705882165580988, /*zp=*/-21,
+                                  /*symmetry=*/false};
+  quantization_settings.hidden_state = {/*scale=*/0.004705882165580988,
+                                        /*zp=*/-21, /*symmetry=*/false};
+  quantization_settings.cell_state = {/*scale=*/0.00024414062, /*zp=*/0,
+                                      /*symmetry=*/true};
+
+  // gate quantization parameters
+  quantization_settings.forget_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.input_gate = {
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.15748031496062992, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.0012351397251814111, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.cell_gate = {
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.007874015748031496, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/6.175698625907056e-5, /*zp=*/0, /*symmetry=*/true}};
+  quantization_settings.output_gate = {
+      {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true},
+      {/*scale=*/0.1, /*zp=*/0, /*symmetry=*/true}};
+
+  return quantization_settings;
+}
+
+LstmNodeContents<int8_t, int8_t, int32_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int8NodeContents(const float* input_data,
+                              const float* hidden_state,
+                              const float* cell_state) {
+  auto float_node_content =
+      Create2x3x2X2FloatNodeContents(input_data, hidden_state, cell_state);
+  const auto quantization_settings = Get2X2Int8LstmQuantizationSettings();
+  return CreateIntegerNodeContents<int8_t, int8_t, int32_t, int16_t, 2, 3, 2,
+                                   2>(quantization_settings,
+                                      float_node_content);
+}
+
+}  // namespace testing
+}  // namespace tflite
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
new file mode 100644
index 000000000..538119044
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
@@ -0,0 +1,535 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
+#include <string>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/micro/kernels/lstm_shared.h"
+#include "tensorflow/lite/micro/test_helpers.h"
+
+namespace tflite {
+namespace testing {
+// Data structure to store all the data used to check output of internal gates
+// of one time step
+// input_size = batch_size*input_dimension (size of the input array)
+// gate_output_size = batch_size*state_dimension (size of the gate output)
+template <int input_size, int gate_output_size>
+struct GateOutputCheckData {
+  float input_data[input_size];
+  float hidden_state[gate_output_size];
+  float cell_state[gate_output_size];
+  float expected_forget_gate_output[gate_output_size];
+  float expected_input_gate_output[gate_output_size];
+  float expected_output_gate_output[gate_output_size];
+  float expected_cell_gate_output[gate_output_size];
+  float expected_updated_cell[gate_output_size];
+  float expected_updated_hidden[gate_output_size];
+};
+
+// Data structure to store all the data used to check the output of the kernel
+// of multiple batch, multiple timesteps
+// input_size = batch_size*time_steps*input_dimension (size of the input array)
+// gate_output_size = batch_size*state_dimension (size of the gate output)
+// output_size = time_steps*gate_output_size (size of the output from the
+// kernel)
+template <int input_size, int gate_output_size, int output_size>
+struct LstmEvalCheckData {
+  float input_data[input_size];
+  float hidden_state[gate_output_size];
+  float expected_output[output_size];
+  float expected_hidden_state[gate_output_size];
+  float expected_cell_state[gate_output_size];
+};
+
+// Struct that holds the weight/bias information for a standard gate (i.e. no
+// modification such as layer normalization, peephole, etc.)
+// Every gate is defined by the type and size of the weights (bias included)
+// inside.
+// Specifically, types are weight type and bias type (normally the same
+// type of MatMul accumulator).
+// activation_weight has shape (hidden state dimension * input tensor dimension)
+// recurrent_weight has shape (hidden state dimension * hidden state dimension)
+// bias has shape (hidden state dimension, 1)
+template <typename WeightType, typename BiasType, int input_dimension,
+          int state_dimension>
+struct GateData {
+  WeightType activation_weight[state_dimension * input_dimension];
+  WeightType recurrent_weight[state_dimension * state_dimension];
+  BiasType fused_bias[state_dimension];
+  // Quantized model folded the zero point of activations into biases:
+  // bias + zero_point * weight.
+  BiasType activation_zp_folded_bias[state_dimension];
+  BiasType recurrent_zp_folded_bias[state_dimension];
+};
+
+// A struct that holds quantization parameters for a LSTM Tensor
+struct TensorQuantizationParameters {
+  double scale;
+  int zero_point;
+  bool symmetry;
+};
+
+// A struct that holds quantization parameters for an internal gate, which is
+// defined by activation/recurrent weight and bias (assuming no internal layer
+// normalization)
+struct GateQuantizationParameters {
+  TensorQuantizationParameters activation_weight;
+  TensorQuantizationParameters recurrent_weight;
+  TensorQuantizationParameters bias;
+};
+
+// A struct that holds the quantization settings for the LSTM node. Data
+// members can be grouped into five parts.
+// 1. Data types (activation,weight, cell, bias)
+// 2. Non-linear activation (i.e., tanh and sigmoid) fixed point
+// calculation settings
+// 3. Input/output tensor quantization settings
+// 4. Internal state (hidden and cell) quantization settings
+// 5. Internal gate (forget, input, cell, output) settings
+struct NodeQuantizationParameters {
+  TfLiteType activation_type;
+  TfLiteType weight_type;
+  TfLiteType cell_type;
+  TfLiteType bias_type;
+  // Fixed point setting for integer nonlinear activation calculation
+  double nonlinear_activation_input_scale;
+  double nonlinear_activation_output_scale;
+  // Quantization parameters for input/output
+  TensorQuantizationParameters input;
+  TensorQuantizationParameters output;
+  // Quantization parameters for internal states
+  TensorQuantizationParameters hidden_state;
+  TensorQuantizationParameters cell_state;
+  // Quantization parameters for gates
+  GateQuantizationParameters forget_gate;
+  GateQuantizationParameters input_gate;
+  GateQuantizationParameters cell_gate;
+  GateQuantizationParameters output_gate;
+};
+
+// Data structure that holds all the information to evaluate a LSTM kernel
+// (mimic the LSTM node).
+// Tensor Types:
+// ActivationType defines the data type of input/output of the layer. The hidden
+// state has the ActivationType as well since it is the layer output of the
+// previous time.
+// WeightType defines the weight data type inside the internal gates.
+// BiasType defines the bias data type inside the internal gates. (normally the
+// same type of MatMul accumulator).
+// Tensor Shapes:
+// The input to the layer has shape (batch_size,time_steps,input_dimension).
+// Both the hidden state and cell state has shape (state_dimension, 1)
+// The output of the layer has shape (batch_size,time_steps,state_dimension)
+//  Note: state values can change through calls (stateful)
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+class LstmNodeContents {
+ public:
+  LstmNodeContents(const LstmNodeContents& other) = default;
+  LstmNodeContents& operator=(const LstmNodeContents& other) = default;
+  // Use the general model setting (builtin data) and the four gates data to
+  // construct the node content. Note the input, hidden state, and cell state
+  // data is provided later for flexible testing (initialize as zero now)
+  LstmNodeContents(
+      const TfLiteLSTMParams builtin_data,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          forget_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          input_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          cell_gate_params,
+      const GateData<WeightType, BiasType, input_dimension, state_dimension>
+          output_gate_params)
+      : builtin_data_(builtin_data),
+        forget_gate_data_(forget_gate_params),
+        input_gate_data_(input_gate_params),
+        cell_gate_data_(cell_gate_params),
+        output_gate_data_(output_gate_params) {
+    InitializeTensors();
+  }
+
+  // Add quantization parameters (scale, zero point) to tensors
+  // Only required for the integer kernel
+  void AddQuantizationParameters(
+      const NodeQuantizationParameters& quantization_params) {
+    quantization_settings_ = quantization_params;
+    // Input Tensor
+    SetTensorQuantizationParam(kLstmInputTensor, quantization_params.input);
+    // Forget Gate Tensors
+    const auto& forget_gate_quant_param = quantization_params.forget_gate;
+    SetTensorQuantizationParam(kLstmInputToForgetWeightsTensor,
+                               forget_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToForgetWeightsTensor,
+                               forget_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmForgetGateBiasTensor,
+                               forget_gate_quant_param.bias);
+    // Input Gate Tensors
+    const auto& input_gate_quant_param = quantization_params.input_gate;
+    SetTensorQuantizationParam(kLstmInputToInputWeightsTensor,
+                               input_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToInputWeightsTensor,
+                               input_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmInputGateBiasTensor,
+                               input_gate_quant_param.bias);
+    // Cell Gate Tensors
+    const auto& cell_gate_quant_param = quantization_params.cell_gate;
+    SetTensorQuantizationParam(kLstmInputToCellWeightsTensor,
+                               cell_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToCellWeightsTensor,
+                               cell_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmCellGateBiasTensor,
+                               cell_gate_quant_param.bias);
+    // Output Gate Tensors
+    const auto& output_gate_quant_param = quantization_params.output_gate;
+    SetTensorQuantizationParam(kLstmInputToOutputWeightsTensor,
+                               output_gate_quant_param.activation_weight);
+    SetTensorQuantizationParam(kLstmRecurrentToOutputWeightsTensor,
+                               output_gate_quant_param.recurrent_weight);
+    SetTensorQuantizationParam(kLstmOutputGateBiasTensor,
+                               output_gate_quant_param.bias);
+    // State Tensors
+    SetTensorQuantizationParam(kLstmOutputStateTensor,
+                               quantization_params.hidden_state);
+    SetTensorQuantizationParam(kLstmCellStateTensor,
+                               quantization_params.cell_state);
+    // Output Tensor
+    SetTensorQuantizationParam(24, quantization_params.output);
+  }
+
+  // Provide interface to set the input tensor values for flexible testing
+  void SetInputData(const ActivationType* data) {
+    std::memcpy(
+        input_, data,
+        batch_size * input_dimension * time_steps * sizeof(ActivationType));
+    SetTensor(kLstmInputTensor, input_, input_size_);
+  }
+  const ActivationType* GetInputData() const { return input_; }
+
+  // Provide interface to set the hidden state tensor values for flexible
+  // testing
+  void SetHiddenStateData(const ActivationType* data) {
+    std::memcpy(hidden_state_, data,
+                batch_size * state_dimension * sizeof(ActivationType));
+  }
+  ActivationType* GetHiddenStateData() { return hidden_state_; }
+
+  // Provide interface to set the cell state tensor values for flexible
+  // testing
+  void SetCellStateData(const CellType* data) {
+    std::memcpy(cell_state_, data,
+                batch_size * state_dimension * sizeof(CellType));
+  }
+  CellType* GetCellStateData() { return cell_state_; }
+  ActivationType* GetOutputData() { return output_; }
+
+  // Internal tensors, fixed (const). see lstm_shared.h for tensor names
+  const TfLiteEvalTensor* GetEvalTensor(const int tensor_index) const {
+    auto valid_index = input_tensor_indeces_[tensor_index + 1];
+    if (valid_index < 0) {
+      return nullptr;
+    }
+    return &eval_tensors_[tensor_index];
+  }
+
+  // Variable tensors (will be changed, can not be const)
+  TfLiteEvalTensor* HiddenStateEvalTensor() {
+    return &eval_tensors_[kLstmOutputStateTensor];
+  }
+  TfLiteEvalTensor* CellStateEvalTensor() {
+    return &eval_tensors_[kLstmCellStateTensor];
+  }
+  TfLiteEvalTensor* OutputEvalTensor() { return &eval_tensors_[24]; }
+
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  ForgetGateData() const {
+    return forget_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  InputGateData() const {
+    return input_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  CellGateData() const {
+    return cell_gate_data_;
+  }
+  const GateData<WeightType, BiasType, input_dimension, state_dimension>&
+  OutputGateData() const {
+    return output_gate_data_;
+  }
+
+  const TfLiteLSTMParams BuiltinData() const { return builtin_data_; }
+
+  const NodeQuantizationParameters QuantizationSettings() const {
+    return quantization_settings_;
+  }
+
+ private:
+  void InitializeTensors() {
+    // Invalid all the input tensors untill we set it
+    input_tensor_indeces_[0] = 24;  // tot elements
+    for (size_t i = 1; i < 25; i++) {
+      input_tensor_indeces_[i] = kTfLiteOptionalTensor;
+    }
+    // Input Tensor
+    SetTensor(kLstmInputTensor, input_, input_size_);
+    // Forget Gate Tensors
+    SetTensor(kLstmInputToForgetWeightsTensor,
+              forget_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToForgetWeightsTensor,
+              forget_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmForgetGateBiasTensor, forget_gate_data_.fused_bias,
+              bias_size_);
+    // Input Gate Tensors
+    SetTensor(kLstmInputToInputWeightsTensor,
+              input_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToInputWeightsTensor,
+              input_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmInputGateBiasTensor, input_gate_data_.fused_bias,
+              bias_size_);
+    // Cell Gate Tensors
+    SetTensor(kLstmInputToCellWeightsTensor, cell_gate_data_.activation_weight,
+              activation_weight_size_);
+    SetTensor(kLstmRecurrentToCellWeightsTensor,
+              cell_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmCellGateBiasTensor, cell_gate_data_.fused_bias, bias_size_);
+    // Output Gate Tensors
+    SetTensor(kLstmInputToOutputWeightsTensor,
+              output_gate_data_.activation_weight, activation_weight_size_);
+    SetTensor(kLstmRecurrentToOutputWeightsTensor,
+              output_gate_data_.recurrent_weight, recurrent_weight_size_);
+    SetTensor(kLstmOutputGateBiasTensor, output_gate_data_.fused_bias,
+              bias_size_);
+    // State Tensors
+    SetTensor(kLstmOutputStateTensor, hidden_state_, state_size_);
+    SetTensor(kLstmCellStateTensor, cell_state_, state_size_);
+    // // Output Tensor
+    SetTensor(24, output_, output_size_);
+  }
+
+  template <typename T>
+  void SetTensor(const int index, const T* data, int* dims) {
+    // Lite tensors for kernel level testing
+    tensors_[index].data.data = const_cast<T*>(data);
+    tensors_[index].dims = IntArrayFromInts(dims);
+    tensors_[index].type = typeToTfLiteType<T>();
+    // Eval tensors for internal computation testing
+    eval_tensors_[index].data.data = const_cast<T*>(data);
+    eval_tensors_[index].dims = IntArrayFromInts(dims);
+    eval_tensors_[index].type = typeToTfLiteType<T>();
+    // update the index
+    input_tensor_indeces_[index + 1] = index;
+  }
+
+  void SetTensorQuantizationParam(
+      const int index, const TensorQuantizationParameters& quant_param) {
+    tensors_[index].params.scale = quant_param.scale;
+    tensors_[index].params.zero_point = quant_param.zero_point;
+  }
+
+  const TfLiteLSTMParams builtin_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      forget_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      input_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      cell_gate_data_;
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      output_gate_data_;
+
+  // Keep to ease the testing process (although all quantization information can
+  // be obtained from individual tensors, they are well organized here and light
+  // weighted)
+  NodeQuantizationParameters quantization_settings_;
+
+  // Not const since IntArrayFromInts takes int *; the first element of the
+  // array must be the size of the array
+  int input_size_[4] = {3, batch_size, time_steps, input_dimension};
+  int output_size_[4] = {3, batch_size, time_steps, state_dimension};
+  int activation_weight_size_[3] = {2, state_dimension, input_dimension};
+  int recurrent_weight_size_[3] = {2, state_dimension, state_dimension};
+  int bias_size_[2] = {1, state_dimension};
+  int state_size_[3] = {2, batch_size, state_dimension};
+
+  // see lstm_shared.h for tensor names, the last tensor is the output tensor
+  TfLiteTensor tensors_[24 + 1];
+  // Use for internel kernel testing
+  TfLiteEvalTensor eval_tensors_[24 + 1];
+  // indices for the tensors inside the node (required by kernel runner)
+  int input_tensor_indeces_[1 + 24] = {};
+  // single output (last in the tensors array)
+  int output_tensor_indeces_[2] = {1, 24};
+
+  // tennsor data
+  // states are initialized to zero
+  ActivationType hidden_state_[batch_size * state_dimension] = {};
+  CellType cell_state_[batch_size * state_dimension] = {};
+  // input is defined in the ModelContent (const across all derived models)
+  ActivationType input_[batch_size * input_dimension * time_steps] = {};
+  ActivationType output_[batch_size * state_dimension * time_steps] = {};
+};
+
+// Convert floating point gate data to the corresponding quantized version
+template <typename WeightType, typename BiasType, int input_dimension,
+          int state_dimension>
+GateData<WeightType, BiasType, input_dimension, state_dimension>
+CreateQuantizedGateData(
+    const GateData<float, float, input_dimension, state_dimension>&
+        gate_parameters,
+    const TensorQuantizationParameters& input_quantization_params,
+    const TensorQuantizationParameters& output_quantization_params,
+    const GateQuantizationParameters& gate_quantization_params) {
+  GateData<WeightType, BiasType, input_dimension, state_dimension>
+      quantized_gate_data;
+  tflite::SymmetricQuantize(gate_parameters.activation_weight,
+                            quantized_gate_data.activation_weight,
+                            state_dimension * input_dimension,
+                            gate_quantization_params.activation_weight.scale);
+  tflite::SymmetricQuantize(gate_parameters.recurrent_weight,
+                            quantized_gate_data.recurrent_weight,
+                            state_dimension * state_dimension,
+                            gate_quantization_params.recurrent_weight.scale);
+  tflite::SymmetricQuantize(gate_parameters.fused_bias,
+                            quantized_gate_data.fused_bias, state_dimension,
+                            gate_quantization_params.bias.scale);
+
+  // Copy the bias values to prepare zero_point folded bias precomputation. bias
+  // has same scale as input_scale*input_weight_scale)
+  std::memcpy(quantized_gate_data.activation_zp_folded_bias,
+              quantized_gate_data.fused_bias,
+              state_dimension * sizeof(BiasType));
+  // Pre-calculate bias - zero_point * weight (a constant).
+  tflite::tensor_utils::MatrixScalarMultiplyAccumulate(
+      quantized_gate_data.activation_weight,
+      -1 * input_quantization_params.zero_point, state_dimension,
+      input_dimension, quantized_gate_data.activation_zp_folded_bias);
+
+  // Initialize the folded bias to zeros for accumulation
+  for (size_t i = 0; i < state_dimension; i++) {
+    quantized_gate_data.recurrent_zp_folded_bias[i] = 0;
+  }
+  // Calculate : -zero_point * weight since it is a constant
+  tflite::tensor_utils::MatrixScalarMultiplyAccumulate(
+      quantized_gate_data.recurrent_weight,
+      -1 * output_quantization_params.zero_point, state_dimension,
+      state_dimension, quantized_gate_data.recurrent_zp_folded_bias);
+
+  return quantized_gate_data;
+}
+
+// Create integer LSTM node content from the float node contents and
+// quantization settings
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+LstmNodeContents<ActivationType, WeightType, BiasType, CellType, batch_size,
+                 time_steps, input_dimension, state_dimension>
+CreateIntegerNodeContents(
+    const NodeQuantizationParameters& quantization_settings,
+    LstmNodeContents<float, float, float, float, batch_size, time_steps,
+                     input_dimension, state_dimension>& float_node_contents) {
+  const auto quantized_forget_gate_data =
+      CreateQuantizedGateData<ActivationType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.ForgetGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.forget_gate);
+  const auto quantized_input_gate_data =
+      CreateQuantizedGateData<ActivationType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.InputGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.input_gate);
+  const auto quantized_cell_gate_data =
+      CreateQuantizedGateData<ActivationType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.CellGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.cell_gate);
+  const auto quantized_output_gate_params =
+      CreateQuantizedGateData<ActivationType, BiasType, input_dimension,
+                              state_dimension>(
+          float_node_contents.OutputGateData(), quantization_settings.input,
+          quantization_settings.output, quantization_settings.output_gate);
+  LstmNodeContents<ActivationType, WeightType, BiasType, CellType, batch_size,
+                   time_steps, input_dimension, state_dimension>
+      quantized_node_content(
+          float_node_contents.BuiltinData(), quantized_forget_gate_data,
+          quantized_input_gate_data, quantized_cell_gate_data,
+          quantized_output_gate_params);
+
+  // Quantize the floating point input
+  ActivationType quantized_input[batch_size * input_dimension * time_steps] =
+      {};
+  Quantize(float_node_contents.GetInputData(), quantized_input,
+           batch_size * input_dimension * time_steps,
+           quantization_settings.input.scale,
+           quantization_settings.input.zero_point);
+  quantized_node_content.SetInputData(quantized_input);
+  // Quantize the  floating point hidden state
+  ActivationType quantized_hidden_state[batch_size * state_dimension] = {};
+  Quantize(float_node_contents.GetHiddenStateData(), quantized_hidden_state,
+           batch_size * state_dimension,
+           quantization_settings.hidden_state.scale,
+           quantization_settings.hidden_state.zero_point);
+  quantized_node_content.SetHiddenStateData(quantized_hidden_state);
+  // Quantize the floating point cell state
+  CellType quantized_cell_state[batch_size * state_dimension] = {};
+  Quantize(float_node_contents.GetCellStateData(), quantized_cell_state,
+           batch_size * state_dimension, quantization_settings.cell_state.scale,
+           quantization_settings.cell_state.zero_point);
+  quantized_node_content.SetCellStateData(quantized_cell_state);
+
+  // Add scale and zero point to tensors
+  quantized_node_content.AddQuantizationParameters(quantization_settings);
+  return quantized_node_content;
+}
+
+// Get the gate output data (one time step) for a simple 2X2 model
+// batch_size = 2; time_steps = 1; input_dimension = 2; state_dimension = 2
+// input_size = batch_size*time_steps*input_dimension = 4
+// gate_output_size = batch_size*state_dimension = 4
+GateOutputCheckData<4, 4> Get2X2GateOutputCheckData();
+
+// Get the kernel output data for a simple 2X2 model
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+// input_size = batch_size*time_steps*input_dimension = 12
+// gate_output_size = batch_size*state_dimension = 4
+// output_size = time_steps*gate_output_size = 12
+LstmEvalCheckData<12, 4, 12> Get2X2LstmEvalCheckData();
+
+// Create a 2x2 float node content
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+LstmNodeContents<float, float, float, float, 2, 3, 2, 2>
+Create2x3x2X2FloatNodeContents(const float* input_data = nullptr,
+                               const float* hidden_state = nullptr,
+                               const float* cell_state = nullptr);
+
+// Get the quantization settings for the 2X2 model
+NodeQuantizationParameters Get2X2Int8LstmQuantizationSettings();
+
+// Create int8 (activation) x int8 (weight) -> int16 (cell) node
+// batch_size = 2; time_steps = 3; input_dimension = 2; state_dimension = 2
+// input is in float format since the source of truth is always the float
+// configuration
+LstmNodeContents<int8_t, int8_t, int32_t, int16_t, 2, 3, 2, 2>
+Create2x3x2X2Int8NodeContents(const float* input_data = nullptr,
+                              const float* hidden_state = nullptr,
+                              const float* cell_state = nullptr);
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_TESTDATA_LSTM_TEST_DATA_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
index a4d50c83a..7a231ff95 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -399,8 +399,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddPack() {
-    return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
-                      ParsePack);
+    return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack);
   }
 
   TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) {
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
index 9f6fc74c9..e9eb5e549 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/micro_profiler.cc
@@ -52,7 +52,7 @@ void MicroProfiler::Log() const {
 #if !defined(TF_LITE_STRIP_ERROR_STRINGS)
   for (int i = 0; i < num_events_; ++i) {
     uint32_t ticks = end_ticks_[i] - start_ticks_[i];
-    MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks,
+    MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks,
                 TicksToMs(ticks));
   }
 #endif
@@ -63,7 +63,7 @@ void MicroProfiler::LogCsv() const {
   MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
   for (int i = 0; i < num_events_; ++i) {
     uint32_t ticks = end_ticks_[i] - start_ticks_[i];
-    MicroPrintf("%d,%s,%u", i, tags_[i], ticks);
+    MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks);
   }
 #endif
 }
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh
new file mode 100755
index 000000000..abfe651c6
--- /dev/null
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifimini.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Called with following arguments:
+# 1 - EXTERNAL or INTERNAL to signal how to run the script
+# 2 - (optional) TENSORFLOW_ROOT: path to root of the TFLM tree (relative to directory from where the script is called).
+# 3 - (optional) EXTERNAL_DIR: Path to the external directory that contains external code
+
+set -e
+pwd
+
+TENSORFLOW_ROOT=${1}
+EXTERNAL_DIR=${2}
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile clean TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# TODO(b/143904317): downloading first to allow for parallel builds.
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile third_party_downloads TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifimini \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=mini1m1m_RG \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  build -j$(nproc)
+
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifimini \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=mini1m1m_RG \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  test -j$(nproc)
\ No newline at end of file
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
index 68176c566..3de4ef406 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver/templates/micro_mutable_op_resolver_test.cc.mako
@@ -75,7 +75,7 @@ void RunModel(const uint8_t* model,
   TfLiteTensor* output_tensor = interpreter.output(0);
   TF_LITE_MICRO_EXPECT_EQ(output_tensor->bytes,
                           golden_size * sizeof(int8_t));
-  int8_t* output = GetTensorData<int8_t>(output_tensor);
+  int8_t* output = ::tflite::GetTensorData<int8_t>(output_tensor);
   for (uint32_t i = 0; i < golden_size; i++) {
     // TODO(b/205046520): Better understand why TfLite and TFLM can sometimes be
     // off by 1.
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
index da51d756a..665c6a62e 100755
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
@@ -55,6 +55,8 @@ if [ ! -f ${CONVERTED_PERSON_MODEL_INT8} ]; then
   source $TEMPFILE/bin/activate
   python3 -m pip install --upgrade pip >&2
   pip install --upgrade cython >&2
+  pip install numpy==1.21.3 >&2  # Some types are removed in the latest numpy.
+                                 # Use an older version until the ethos-u-vela package is updated.
   pip install --prefer-binary ethos-u-vela >&2
   vela --accelerator-config=ethos-u55-256 ${DOWNLOADS_DIR}/../../../models/person_detect.tflite \
        --output-dir ${MODEL_DIR} >&2
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
index 20ee1e4e0..e21bcf953 100644
--- a/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
+++ b/third_party/tflite-micro/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
@@ -16,7 +16,13 @@ MICROLITE_CC_KERNEL_SRCS += \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \
   $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/softmax_vision.cc
 
-ifeq ($(TARGET_ARCH), hifi5)
+ifeq ($(TARGET_ARCH), hifimini)
+  # hifimini optimizations are implemented in the TFLM repository itself.
+  THIRD_PARTY_KERNEL_CC_SRCS += \
+    $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/svdf.cc \
+    $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/xtensa/hifimini/fully_connected.cc
+
+else ifeq ($(TARGET_ARCH), hifi5)
   DOWNLOAD_RESULT := $(shell $(MAKEFILE_DIR)/ext_libs/xtensa_download.sh ${DOWNLOADS_DIR} hifi5 $(TENSORFLOW_ROOT))
   ifneq ($(DOWNLOAD_RESULT), SUCCESS)
     $(error Something went wrong with the xtensa download: $(DOWNLOAD_RESULT))
@@ -124,7 +130,7 @@ else ifeq ($(TARGET_ARCH), vision_p6)
   INCLUDES += \
     -I$(NNLIB_PATH)/flk/include \
     -I$(NNLIB_PATH)/kernels/include/ \
-    -I$(NNLIB_PATH)/runtime/include/ 
+    -I$(NNLIB_PATH)/runtime/include/
 
   LDFLAGS += -lidma
 else
@@ -141,4 +147,10 @@ THIRD_PARTY_KERNEL_CC_SRCS += \
 
 THIRD_PARTY_CC_HDRS += \
     $(shell find $(FFT_PATH)/hifi3_fft -name "*.h")
+else ifeq ($(TARGET_ARCH), hifimini)
+THIRD_PARTY_KERNEL_CC_SRCS += \
+    $(shell find $(FFT_PATH)/hifi2_fft -name "*.c")
+
+THIRD_PARTY_CC_HDRS += \
+    $(shell find $(FFT_PATH)/hifi2_fft -name "*.h")
 endif