Download Cadence Nature DSP from GitHub (#2241)

Variants for hifi3, hifi4 and hifi5. BUG=301551681
tensorflow · Oct 5, 2023 · f391e68 · f391e68
1 parent a438ba8
commit f391e68
Show file tree

Hide file tree

Showing 32 changed files with 436 additions and 130 deletions.
diff --git a/signal/micro/kernels/fft_test.cc b/signal/micro/kernels/fft_test.cc
@@ -274,11 +274,11 @@ TF_LITE_MICRO_TEST(RfftTestSize512Int16) {
   const TFLMRegistration* registration =
       tflite::tflm_signal::Register_RFFT_INT16();
 // See (b/287518815) for why this is needed.
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   int tolerance = 9;
-#else   // defined(HIFI4) || defined(HIFI5)
+#else   // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   int tolerance = 3;
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteOk, tflite::testing::TestFFT<int16_t>(
                      input_shape, tflite::kRfftInt16Length512Input,

diff --git a/tensorflow/extra_rules.bzl b/tensorflow/extra_rules.bzl
@@ -17,6 +17,10 @@ def xtensa_fusion_f1_config():
     """Config setting for all Fusion F1 based cores."""
     return "//tensorflow/lite/micro/kernels:xtensa_fusion_f1_default"
 
+def xtensa_hifi_3_config():
+    """Config setting for all HiFi 3 based cores."""
+    return "//tensorflow/lite/micro/kernels:xtensa_hifi_3_default"
+
 def xtensa_hifi_3z_config():
     """Config setting for all HiFi 3z based cores."""
     return "//tensorflow/lite/micro/kernels:xtensa_hifi_3z_default"

diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
@@ -4,6 +4,7 @@ load(
     "//tensorflow:extra_rules.bzl",
     "tflm_kernel_friends",
     "xtensa_fusion_f1_config",
+    "xtensa_hifi_3_config",
     "xtensa_hifi_3z_config",
     "xtensa_hifi_5_config",
     "xtensa_vision_p6_config",
@@ -170,6 +171,11 @@ cc_library(
     ],
 )
 
+HIFI3_COPTS = [
+    "-DXTENSA=1",
+    "-DHIFI3=1",
+]
+
 HIFI4_COPTS = [
     "-DXTENSA=1",
     "-DHIFI4=1",
@@ -320,19 +326,22 @@ tflm_kernel_cc_library(
         "svdf.h",
     ] + select({
         xtensa_fusion_f1_config(): glob(["xtensa/**/*.h"]),
+        xtensa_hifi_3_config(): glob(["xtensa/**/*.h"]),
         xtensa_hifi_3z_config(): glob(["xtensa/**/*.h"]),
         xtensa_hifi_5_config(): glob(["xtensa/**/*.h"]),
         xtensa_vision_p6_config(): glob(["xtensa/**/*.h"]),
         "//conditions:default": [],
     }),
     accelerated_srcs = {
         xtensa_fusion_f1_config(): glob(["xtensa/**/*.cc"]),
+        xtensa_hifi_3_config(): glob(["xtensa/**/*.cc"]),
         xtensa_hifi_3z_config(): glob(["xtensa/**/*.cc"]),
         xtensa_hifi_5_config(): glob(["xtensa/**/*.cc"]),
         xtensa_vision_p6_config(): glob(["xtensa/**/*.cc"]),
     },
     copts = micro_copts() + select({
         xtensa_fusion_f1_config(): HIFI4_COPTS,
+        xtensa_hifi_3_config(): HIFI3_COPTS,
         xtensa_hifi_3z_config(): HIFI4_COPTS,
         xtensa_hifi_5_config(): HIFI5_COPTS,
         xtensa_vision_p6_config(): VP6_COPTS,
@@ -368,6 +377,7 @@ tflm_kernel_cc_library(
         "@flatbuffers//:runtime_cc",
     ] + select({
         xtensa_fusion_f1_config(): ["//third_party/xtensa/nnlib_hifi4:nnlib_hifi4_lib"],
+        xtensa_hifi_3_config(): ["//third_party/xtensa/nnlib_hifi4:nnlib_hifi4_lib"],
         xtensa_hifi_3z_config(): ["//third_party/xtensa/nnlib_hifi4:nnlib_hifi4_lib"],
         xtensa_hifi_5_config(): ["//third_party/xtensa/nnlib_hifi5:nnlib_hifi5_lib"],
         xtensa_vision_p6_config(): ["//third_party/xtensa/xi_tflmlib_vision_p6:xi_tflmlib_vision_p6_lib"],
@@ -1463,6 +1473,7 @@ string_flag(
     values = [
         "",
         "xtensa_fusion_f1",
+        "xtensa_hifi_3",
         "xtensa_hifi_3z",
         "xtensa_hifi_5",
         "xtensa_vision_p6",
@@ -1476,6 +1487,13 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "xtensa_hifi_3_default",
+    flag_values = {
+        ":optimized_kernels": "xtensa_hifi_3",
+    },
+)
+
 config_setting(
     name = "xtensa_hifi_3z_default",
     flag_values = {

diff --git a/tensorflow/lite/micro/kernels/xtensa/add.cc b/tensorflow/lite/micro/kernels/xtensa/add.cc
@@ -113,11 +113,11 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
   op_params.output_shift = data->output_shift;
   SetActivationParams(data->output_activation_min, data->output_activation_max,
                       &op_params);
-#if !(defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4))
   bool need_broadcast = reference_ops::ProcessBroadcastShapes(
       tflite::micro::GetTensorShape(input1),
       tflite::micro::GetTensorShape(input2), &op_params);
-#endif  // !defined(HIFI4)
+#endif  // !defined(HIFI3) && !defined(HIFI4)
 
   switch (output->type) {
     case kTfLiteInt8: {
@@ -126,7 +126,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
           *(reinterpret_cast<XtensaAddOpData*>(node->user_data));
       AddEvalQuantizedVision(context, node, *params, op_data, input1, input2,
                              output);
-#elif defined(HIFI4)  // defined(VISION_P6)
+#elif defined(HIFI3) || defined(HIFI4)  // defined(VISION_P6)
       int err;
       const RuntimeShape extended_input1_shape =
           RuntimeShape::ExtendedShape(4, tflite::micro::GetTensorShape(input1));
@@ -150,7 +150,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
           op_params.left_shift);
 
       TF_LITE_ENSURE(context, err == 0);
-#else                 // defined(VISION_P6)
+#else                                   // defined(VISION_P6)
       if (need_broadcast) {
         reference_integer_ops::BroadcastAdd4DSlow(
             op_params, tflite::micro::GetTensorShape(input1),
@@ -168,11 +168,11 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int8_t>(output));
       }
-#endif                // defined(VISION_P6)
+#endif                                  // defined(VISION_P6)
       break;
     }
     case kTfLiteInt16: {
-#if defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4)
       int err;
       const RuntimeShape extended_input1_shape =
           RuntimeShape::ExtendedShape(4, tflite::micro::GetTensorShape(input1));
@@ -196,7 +196,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
           op_params.left_shift);
 
       TF_LITE_ENSURE(context, err == 0);
-#else   // defined(HIFI4)
+#else   // defined(HIFI3) || defined(HIFI4)
       if (need_broadcast) {
         reference_ops::BroadcastAdd4DSlow(
             op_params, tflite::micro::GetTensorShape(input1),
@@ -214,7 +214,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                            tflite::micro::GetTensorData<int16_t>(output),
                            false);
       }
-#endif  // defined(HIFI4)
+#endif  // defined(HIFI3) || defined(HIFI4)
       break;
     }
     default:

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv.cc b/tensorflow/lite/micro/kernels/xtensa/conv.cc
@@ -66,7 +66,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       break;
     }
     case kTfLiteInt8: {
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
       if (params.dilation_width_factor == 1 &&
           params.dilation_height_factor == 1) {
         return ConvEvalHifiInt8(context, node, params, op_data, input, filter,
@@ -91,7 +91,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 #endif
     }
     case kTfLiteInt16: {
-#if defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4)
       // Note that int32 bias is not widely supported and might be risky (e.g.
       // http://b/262003750). As such, while we have a fallback to the reference
       // implementation, production use-cases should only have int64 bias.

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_common_xtensa.cc b/tensorflow/lite/micro/kernels/xtensa/conv_common_xtensa.cc
@@ -42,9 +42,9 @@ void* ConvInitXtensa(TfLiteContext* context, const char* buffer,
 TfLiteStatus ConvPrepareXtensa(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, ConvPrepare(context, node));
 
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   TF_LITE_ENSURE_OK(context, ConvPrepareHifi(context, node));
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
 #if defined(VISION_P6)
   TF_LITE_ENSURE_OK(context, ConvPrepareVision(context, node));

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
 #include <cstdint>
 
@@ -55,14 +55,14 @@ TfLiteStatus ConvPrepareHifi(TfLiteContext* context, TfLiteNode* node) {
   /* TODO(b/277112516): Dilation is currently not supported on HiFi 4 NN Library
    */
   bool inputs_and_bias_ok = bias != nullptr;
-#ifdef HIFI4
+#if defined(HIFI3) || defined(HIFI4)
   inputs_and_bias_ok =
       inputs_and_bias_ok &&
       (input->type == kTfLiteInt8 ||
        (input->type == kTfLiteInt16 && bias->type == kTfLiteInt64));
 #else
   inputs_and_bias_ok = inputs_and_bias_ok && (input->type == kTfLiteInt8);
-#endif  // HIFI4
+#endif  // defined(HIFI3) || defined(HIFI4)
   if (!(inputs_and_bias_ok && params->dilation_width_factor == 1 &&
         params->dilation_height_factor == 1 &&
         input_shape.Dims(1) >= filter_shape.Dims(1) &&
@@ -115,7 +115,7 @@ TfLiteStatus ConvPrepareHifi(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-#if defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4)
 TfLiteStatus ConvEvalHifiInt16(TfLiteContext* context, TfLiteNode* node,
                                const TfLiteConvParams& params,
                                const XtensaConvOpData& data,
@@ -210,7 +210,7 @@ TfLiteStatus ConvEvalHifiInt16(TfLiteContext* context, TfLiteNode* node,
 
   return kTfLiteOk;
 }
-#endif  // defined(HIFI4)
+#endif  // defined(HIFI3) || defined(HIFI4)
 
 TfLiteStatus ConvEvalHifiInt8(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteConvParams& params,
@@ -325,4 +325,4 @@ TfLiteStatus ConvEvalHifiInt8(TfLiteContext* context, TfLiteNode* node,
 }
 
 }  // namespace tflite
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc b/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc
@@ -42,7 +42,7 @@ TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* bias =
       tflite::micro::GetEvalInput(context, node, kConvBiasTensor);
 
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   return ConvEvalHifiInt8(context, node, params, op_data, input, filter, bias,
                           output);
 #elif defined(VISION_P6)
@@ -54,7 +54,7 @@ TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) {
-#if defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4)
   const auto& op_data = *(reinterpret_cast<XtensaConvOpData*>(node->user_data));
   const auto& params =
       *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));

diff --git a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv.cc
@@ -61,9 +61,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
   micro_context->DeallocateTempTfLiteTensor(input);
 
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   TF_LITE_ENSURE_OK(context, DepthwiseConvPrepareHifi(context, node));
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
 #if defined(VISION_P6)
   TF_LITE_ENSURE_OK(context, DepthwiseConvPrepareVision(context, node));
@@ -97,7 +97,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt8: {
       switch (filter_int8.type) {
         case kTfLiteInt8: {
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
           DepthwiseConvEvalHifi(context, node, params, op_data, input,
                                 &filter_int8, bias, output);
 #elif defined(VISION_P6)
@@ -116,7 +116,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
           break;
         }
         default:

diff --git a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/xtensa/xtensa.h"
 #include "tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h"
 
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 namespace tflite {
 TfLiteStatus DepthwiseConvPrepareHifi(TfLiteContext* context,
                                       TfLiteNode* node) {
@@ -187,4 +187,4 @@ TfLiteStatus DepthwiseConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 }  // namespace tflite
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) ||defined(HIFI4) || defined(HIFI5)
diff --git a/tensorflow/lite/micro/kernels/xtensa/fully_connected_int8.cc b/tensorflow/lite/micro/kernels/xtensa/fully_connected_int8.cc
@@ -57,7 +57,7 @@ TfLiteStatus XtensaEvalFullyConnectedQuantizedInt8(
                              tflite::micro::GetTensorShape(bias), bias_data,
                              tflite::micro::GetTensorShape(output),
                              tflite::micro::GetTensorData<int8_t>(output));
-#elif defined(HIFI4) || defined(HIFI5)
+#elif defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
   const int num_batches =
       FlatSizeSkipDim(output_shape, output_shape.DimensionsCount() - 1);
@@ -103,7 +103,7 @@ TfLiteStatus XtensaEvalFullyConnectedQuantizedInt8(
       tflite::micro::GetTensorShape(bias), bias_data,
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<int8_t>(output));
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
   return kTfLiteOk;
 }

diff --git a/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc b/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc
@@ -76,7 +76,7 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     case kTfLiteInt16: {
-#if defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4)
       const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
       const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
       const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -89,7 +89,7 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
       if (err != 0) return kTfLiteError;
 #else
       QuantizeLeakyRelu<int16_t>(data, input, output);
-#endif  // defined(HIFI4)
+#endif  // defined(HIFI3) || defined(HIFI4)
       return kTfLiteOk;
     } break;
     default:

diff --git a/tensorflow/lite/micro/kernels/xtensa/logistic.cc b/tensorflow/lite/micro/kernels/xtensa/logistic.cc
@@ -54,7 +54,7 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {
     case kTfLiteFloat32: {
-#if HIFI_VFPU && (defined(HIFI4) || defined(HIFI5))
+#if HIFI_VFPU && (defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
       const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
       const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
       const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -70,11 +70,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
                               tflite::micro::GetTensorData<float>(input),
                               tflite::micro::GetTensorShape(output),
                               tflite::micro::GetTensorData<float>(output));
-#endif  // HIFI_VFPU && (defined(HIFI4) || defined(HIFI5))
+#endif  // HIFI_VFPU && (defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
       break;
     }
     case kTfLiteInt8: {
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
       const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
       const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
       const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -96,7 +96,7 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
           data->input_multiplier, data->input_left_shift,
           NumElements(input->dims), tflite::micro::GetTensorData<int8_t>(input),
           tflite::micro::GetTensorData<int8_t>(output));
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
       break;
     }
     case kTfLiteInt16: {