diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index 57d63a916a1..151d3b47ed5 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,13 @@ constexpr int kOutputTensor = 0; struct OpData { ConcatenationParams params; + +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int scratch_indices[kMaxInputNum]; + +#endif // USE_TFLM_COMPRESSION }; // Handles negative axis index, coerces to positive index value. @@ -52,8 +59,6 @@ inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) { inline void GetAllInputTensorShapes(const TfLiteContext* context, const TfLiteNode* node, RuntimeShape all_shapes[kMaxInputNum]) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); for (int i = 0; i < node->inputs->size; ++i) { const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); RuntimeShape shape = tflite::micro::GetTensorShape(t); @@ -73,12 +78,22 @@ inline void GetShapesPointers(const RuntimeShape* shapes, size_t num, template inline void GetAllInputTensorData(const TfLiteContext* context, const TfLiteNode* node, - T* all_data[kMaxInputNum]) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); + const T* all_data[kMaxInputNum]) { +#ifdef USE_TFLM_COMPRESSION + const OpData* data = static_cast(node->user_data); + MicroContext* micro_context = GetMicroContext(context); +#endif // USE_TFLM_COMPRESSION + for (int i = 0; i < node->inputs->size; ++i) { const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); +#ifdef USE_TFLM_COMPRESSION + const CompressionTensorData* comp_td = + micro_context->GetTensorCompressionData(node, i); + all_data[i] = tflite::micro::GetTensorData(micro_context, t, comp_td, + data->scratch_indices[i]); +#else // USE_TFLM_COMPRESSION all_data[i] = tflite::micro::GetTensorData(t); +#endif // USE_TFLM_COMPRESSION } } @@ -88,6 +103,10 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { RuntimeShape inputs_shape[kMaxInputNum]; const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; const data_type* inputs_data[kMaxInputNum]; + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); GetAllInputTensorShapes(context, node, inputs_shape); GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); GetAllInputTensorData(context, node, inputs_data); @@ -95,9 +114,6 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); - TFLITE_DCHECK(node->user_data != nullptr); - const OpData* data = static_cast(node->user_data); - reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data, tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); @@ -126,7 +142,6 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteType output_type = output_tensor->type; micro_context->DeallocateTempTfLiteTensor(input_tensor); - micro_context->DeallocateTempTfLiteTensor(output_tensor); // Check activation and input type TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); @@ -136,16 +151,22 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { input_type == kTfLiteInt64 || input_type == kTfLiteBool); // Output type must match input type - TF_LITE_ENSURE_EQ(context, output_type, input_type); + TF_LITE_ENSURE_TYPES_EQ(context, output_type, input_type); // This implementation does not support large number of input tensors const int num_inputs = NumInputs(node); TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum); - // Shapes with dimensions >4 are not yet supported with static allocation. + // Calculate OpData. + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + // Shapes with dimensions > kMaxSmallSize are not yet supported with static + // allocation. for (int i = 0; i < num_inputs; ++i) { TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i); TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, input_type); int num_dimensions = NumDimensions(input); if (num_dimensions > RuntimeShape::kMaxSmallSize) { @@ -155,62 +176,53 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { RuntimeShape::kMaxSmallSize, num_dimensions); return kTfLiteError; } + + if (input_type == kTfLiteInt8) { + // Make sure there is no re-scaling needed for Int8 quantized kernel. This + // is a restriction we introduced to Int8 kernels. + TF_LITE_ENSURE_EQ(context, static_cast(input->params.scale), + static_cast(output_tensor->params.scale)); + TF_LITE_ENSURE_EQ(context, input->params.zero_point, + output_tensor->params.zero_point); + } else if (input_type == kTfLiteInt16) { + // Make sure that all Int16 inputs have a null zero-point. + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + } + +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->scratch_indices[i] = + micro_context->AllocateDecompressionScratchBuffer(node, i); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(input); } - // Calculate OpData. - TFLITE_DCHECK(node->user_data != nullptr); - OpData* data = static_cast(node->user_data); - - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); + if (input_type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, output_tensor->params.zero_point, 0); + } switch (output_type) { // Already know in/outtypes are same. case kTfLiteBool: case kTfLiteFloat32: + case kTfLiteInt8: case kTfLiteInt16: case kTfLiteInt32: case kTfLiteInt64: { - data->params.axis = CalculatePositiveAxis(params->axis, output); - data->params.inputs_count = node->inputs->size; - break; - } - case kTfLiteInt8: { - data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.axis = CalculatePositiveAxis(params->axis, output_tensor); data->params.inputs_count = node->inputs->size; - - float* input_scales = - reinterpret_cast(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(float))); - - int32_t* input_zero_points = - reinterpret_cast(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(int32_t))); - - // Allocate persistent scale and zeropoint buffers. - // Store input scale and zero point values in OpParams: - for (int i = 0; i < node->inputs->size; ++i) { - TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i); - TF_LITE_ENSURE(context, t != nullptr); - input_scales[i] = t->params.scale; - input_zero_points[i] = t->params.zero_point; - micro_context->DeallocateTempTfLiteTensor(t); - } - - data->params.input_scale = input_scales; - data->params.input_zeropoint = input_zero_points; - data->params.output_zeropoint = output->params.zero_point; - data->params.output_scale = output->params.scale; break; } default: - MicroPrintf("Op Concatenation does not currently support Type '%s'.", + MicroPrintf("Op Concatenation does not currently support type '%s'.", TfLiteTypeGetName(output_type)); return kTfLiteError; } - micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(output_tensor); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/concatenation_test.cc b/tensorflow/lite/micro/kernels/concatenation_test.cc index ddbc74d4aa4..c7e698007ea 100644 --- a/tensorflow/lite/micro/kernels/concatenation_test.cc +++ b/tensorflow/lite/micro/kernels/concatenation_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" @@ -56,9 +57,14 @@ void TestConcatenateOneInput(int* input1_dims_data, const T* input1_data, } template -void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, - int* input2_dims_data, const T* input2_data, - int axis, int* output_dims_data, T* output_data) { +void TestConcatenateTwoInputs( + int* input1_dims_data, const T* input1_data, int* input2_dims_data, + const T* input2_data, int axis, int* output_dims_data, T* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo (*comp_info)[2] = nullptr +#endif // USE_TFLM_COMPRESSION +) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -70,6 +76,21 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, CreateTensor(input2_data, input2_dims), CreateTensor(output_data, output_dims)}; +#ifdef USE_TFLM_COMPRESSION + + TestCompressedList tcl; + const CompressedTensorList* comp_list_p = nullptr; + + if (comp_info != nullptr) { + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput((*comp_info)[0], tensors[0], 0), + kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput((*comp_info)[1], tensors[1], 1), + kTfLiteOk); + comp_list_p = tcl.GetCompressedTensorList(); + } + +#endif // USE_TFLM_COMPRESSION + int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; @@ -83,7 +104,12 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, const TFLMRegistration registration = Register_CONCATENATION(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data)); + reinterpret_cast(&builtin_data) +#ifdef USE_TFLM_COMPRESSION + , + nullptr, comp_list_p +#endif // USE_TFLM_COMPRESSION + ); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); @@ -92,9 +118,19 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, void TestConcatenateTwoFloatInputs( int* input1_dims_data, const float* input1_data, int* input2_dims_data, const float* input2_data, int axis, int* output_dims_data, - const float* expected_output_data, float* output_data) { + const float* expected_output_data, float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo (*comp_info)[2] = nullptr +#endif // USE_TFLM_COMPRESSION +) { TestConcatenateTwoInputs(input1_dims_data, input1_data, input2_dims_data, - input2_data, axis, output_dims_data, output_data); + input2_data, axis, output_dims_data, output_data +#ifdef USE_TFLM_COMPRESSION + , + comp_info +#endif // USE_TFLM_COMPRESSION + ); TfLiteIntArray* dims = tflite::testing::IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*dims); @@ -148,6 +184,68 @@ void TestConcatenateQuantizedTwoInputs( } } +#ifdef USE_TFLM_COMPRESSION + +template +void TestConcatenateQuantizedTwoInputsCompressed( + int* input1_dims_data, const uint8_t* input1_data, int* input2_dims_data, + const uint8_t* input2_data, const float input_scale, + const int input_zero_point, int axis, int* output_dims_data, + const T* expected_output_data, const float output_scale, + const int output_zero_point, T* output_data, + const TestCompressionInfo (&comp_info)[2]) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + constexpr int input_size = 2; + constexpr int output_size = 1; + constexpr int tensors_size = input_size + output_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input1_data, input1_dims, input_scale, + input_zero_point, false, typeToTfLiteType()), + CreateQuantizedTensor(input2_data, input2_dims, input_scale, + input_zero_point, false, typeToTfLiteType()), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point)}; + +#ifdef USE_TFLM_COMPRESSION + + TestCompressedList tcl; + const CompressedTensorList* comp_list_p = nullptr; + + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput(comp_info[0], tensors[0], 0), kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput(comp_info[1], tensors[1], 1), kTfLiteOk); + comp_list_p = tcl.GetCompressedTensorList(); + +#endif // USE_TFLM_COMPRESSION + + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + TfLiteConcatenationParams builtin_data = { + .axis = axis, + .activation = kTfLiteActNone // Only activation supported in this impl + }; + + const TFLMRegistration registration = Register_CONCATENATION(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), nullptr, comp_list_p); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + const int output_dims_count = ElementCount(*output_dims); + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); + } +} + +#endif // USE_TFLM_COMPRESSION + } // namespace } // namespace testing } // namespace tflite @@ -237,6 +335,43 @@ TF_LITE_MICRO_TEST(TwoInputsAllAxesCombinations) { output_shape_axis1, output_value_axis1, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsFloatCompressed) { + int input_shape[] = {2, 2, 3}; + const float input1_value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + const float input2_value[] = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t inputs_compressed[] = {0x05, 0x39, 0x40}; + constexpr int kBitWidth = 3; + + // expected output when concatenating on axis 0 + int output_shape_axis0[] = {2, 4, 3}; + const float output_value_axis0[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + + float output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_value; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_value; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + // Axis = 0 + tflite::testing::TestConcatenateTwoFloatInputs( + input_shape, reinterpret_cast(inputs_compressed), + input_shape, reinterpret_cast(inputs_compressed), + /* axis */ 0, output_shape_axis0, output_value_axis0, output_data, + &comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8) { const int axis = 2; int input_shape[] = {3, 2, 1, 2}; @@ -260,6 +395,45 @@ TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8) { output_zero_point, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8Compressed) { + const int axis = 2; + int input_shape[] = {3, 2, 1, 2}; + int output_shape[] = {3, 2, 1, 4}; + + const float input_scale = 0.1f; + const int input_zero_point = 0; + const float output_scale = 0.1f; + const int output_zero_point = 0; + + const int8_t input1_values[] = {1, 2, 3, 4}; + const int8_t input2_values[] = {5, 6, 7, 8}; + const int8_t output_value[] = {1, 2, 5, 6, 3, 4, 7, 8}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t input_compressed[] = {0x1B}; + constexpr int kBitWidth = 2; + + int8_t output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_values; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_values; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + tflite::testing::TestConcatenateQuantizedTwoInputsCompressed( + input_shape, input_compressed, input_shape, input_compressed, input_scale, + input_zero_point, axis, output_shape, output_value, output_scale, + output_zero_point, output_data, comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16) { const int axis = 2; int input_shape[] = {3, 2, 1, 2}; @@ -283,6 +457,45 @@ TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16) { output_zero_point, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16Compressed) { + const int axis = 2; + int input_shape[] = {3, 2, 1, 2}; + int output_shape[] = {3, 2, 1, 4}; + + const float input_scale = 0.1f; + const int input_zero_point = 0; + const float output_scale = 0.1f; + const int output_zero_point = 0; + + const int16_t input1_values[] = {1, 2, 3, 4}; + const int16_t input2_values[] = {5, 6, 7, 8}; + const int16_t output_value[] = {1, 2, 5, 6, 3, 4, 7, 8}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t input_compressed[] = {0x1B}; + constexpr int kBitWidth = 2; + + int16_t output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_values; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_values; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + tflite::testing::TestConcatenateQuantizedTwoInputsCompressed( + input_shape, input_compressed, input_shape, input_compressed, input_scale, + input_zero_point, axis, output_shape, output_value, output_scale, + output_zero_point, output_data, comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(ThreeDimensionalTwoInputsDifferentShapes) { const int axis = 1;