From 099774d98b5fbccb12df37677b3e8130dfd252c5 Mon Sep 17 00:00:00 2001 From: Ryan Kuester Date: Mon, 16 Dec 2024 14:42:02 -0700 Subject: [PATCH] feat(compression): implement tensor decompression in op transpose conv (#3018) Implement tensor decompression in op transpose conv. Extend tests to validate operation on compressed tensors. BUG=part of #2636 --- .../lite/micro/kernels/transpose_conv.cc | 127 +++-- .../lite/micro/kernels/transpose_conv.h | 15 +- .../lite/micro/kernels/transpose_conv_test.cc | 519 +++++++++++++++++- .../micro/kernels/xtensa/transpose_conv.cc | 122 +++- 4 files changed, 705 insertions(+), 78 deletions(-) diff --git a/tensorflow/lite/micro/kernels/transpose_conv.cc b/tensorflow/lite/micro/kernels/transpose_conv.cc index ea0efae0607..715b4759dbf 100644 --- a/tensorflow/lite/micro/kernels/transpose_conv.cc +++ b/tensorflow/lite/micro/kernels/transpose_conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,30 +27,26 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/transpose_conv.h" #include "tensorflow/lite/micro/micro_log.h" namespace tflite { namespace { -// For the TfLite transpose_conv implementation, input tensor 0 corresponds to -// the OutputShapeTensor. However, since TFLM does not support dynamic tensors, -// the TFLM implementation ignores input tensor 0 and the only inputs we care -// about are kFilterTensor, kInputTensor and kBiasTensor. -constexpr int kFilterTensor = 1; -constexpr int kInputTensor = 2; -constexpr int kBiasTensor = 3; -constexpr int kOutputTensor = 0; - -// Conv is quantized along dimension 0: -// https://www.tensorflow.org/lite/performance/quantization_spec -constexpr int kConvQuantizedDimension = 0; - struct OpData { ConvParams params; // A scratch buffer is required for quantized implementations. int scratch_buffer_index; +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int filter_scratch_index; + int bias_scratch_index; + +#endif // USE_TFLM_COMPRESSION + // Index to the converted 64-bit bias buffer from 16-bit bias. This is // required to handle 16x8 transpose convolutions where a 16-bit bias is // provided, whereas the kernel expects 64-bit biases. @@ -102,17 +98,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, MicroContext* micro_context = GetMicroContext(context); TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); + micro_context->AllocateTempInputTensor(node, kTransposeConvInputTensor); TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kFilterTensor); + TfLiteTensor* filter = micro_context->AllocateTempInputTensor( + node, kTransposeConvFilterTensor); TF_LITE_ENSURE(context, filter != nullptr); TfLiteTensor* bias = - micro_context->AllocateTempInputTensor(node, kBiasTensor); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); + micro_context->AllocateTempInputTensor(node, kTransposeConvBiasTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor( + node, kTransposeConvOutputTensor); TF_LITE_ENSURE(context, output != nullptr); - int output_channels = filter->dims->data[kConvQuantizedDimension]; + int output_channels = filter->dims->data[kTransposeConvQuantizedDimension]; TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( context, input, filter, bias, output, kTfLiteActNone, @@ -164,13 +160,13 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { MicroContext* micro_context = GetMicroContext(context); TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); + micro_context->AllocateTempOutputTensor(node, kTransposeConvOutputTensor); TF_LITE_ENSURE(context, output != nullptr); TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); + micro_context->AllocateTempInputTensor(node, kTransposeConvInputTensor); TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kFilterTensor); + micro_context->AllocateTempInputTensor(node, kTransposeConvFilterTensor); TF_LITE_ENSURE(context, filter != nullptr); TF_LITE_ENSURE_MSG( @@ -186,7 +182,7 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { const int filter_height = SizeOfDimension(filter, 1); // Dynamically allocate per-channel quantization parameters. - const int num_channels = filter->dims->data[kConvQuantizedDimension]; + const int num_channels = filter->dims->data[kTransposeConvQuantizedDimension]; data->per_channel_output_multiplier = static_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); @@ -223,10 +219,10 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, affine_quantization->scale); TF_LITE_ENSURE(context, affine_quantization->zero_point); - TF_LITE_ENSURE(context, - affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kTransposeConvQuantizedDimension]); TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, affine_quantization->zero_point->size); } @@ -244,6 +240,18 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { data->params.stride_width = params->stride_width; data->params.stride_height = params->stride_height; +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->filter_scratch_index = + micro_context->AllocateDecompressionScratchBuffer( + node, kTransposeConvFilterTensor); + data->bias_scratch_index = micro_context->AllocateDecompressionScratchBuffer( + node, kTransposeConvBiasTensor); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(output); micro_context->DeallocateTempTfLiteTensor(input); micro_context->DeallocateTempTfLiteTensor(filter); @@ -252,15 +260,26 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); + tflite::micro::GetEvalInput(context, node, kTransposeConvInputTensor); const TfLiteEvalTensor* filter = - tflite::micro::GetEvalInput(context, node, kFilterTensor); + tflite::micro::GetEvalInput(context, node, kTransposeConvFilterTensor); const TfLiteEvalTensor* bias = (NumInputs(node) == 4) - ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + ? tflite::micro::GetEvalInput(context, node, kTransposeConvBiasTensor) : nullptr; TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); + tflite::micro::GetEvalOutput(context, node, kTransposeConvOutputTensor); + +#ifdef USE_TFLM_COMPRESSION + + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* filter_comp_td = + micro_context->GetTensorCompressionData(node, kTransposeConvFilterTensor); + const CompressionTensorData* bias_comp_td = + micro_context->GetTensorCompressionData(node, kTransposeConvBiasTensor); + +#endif // USE_TFLM_COMPRESSION TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); @@ -280,9 +299,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { op_params, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr); @@ -296,9 +323,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); @@ -311,16 +346,29 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { auto* bias_converted_buffer = static_cast(context->GetScratchBuffer( context, data.bias_converted_buffer_index)); + const int16_t* const bias_int16_data = +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index); +#else // USE_TFLM_COMPRESSION + static_cast(bias->data.data); +#endif // USE_TFLM_COMPRESSION for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize(); i++) { - bias_converted_buffer[i] = bias->data.i16[i]; + bias_converted_buffer[i] = bias_int16_data[i]; } reference_integer_ops::TransposeConv( data.params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(bias), bias_converted_buffer, tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), @@ -331,9 +379,18 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); diff --git a/tensorflow/lite/micro/kernels/transpose_conv.h b/tensorflow/lite/micro/kernels/transpose_conv.h index 3a99ccbf847..ec0416e067f 100644 --- a/tensorflow/lite/micro/kernels/transpose_conv.h +++ b/tensorflow/lite/micro/kernels/transpose_conv.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,6 +23,19 @@ limitations under the License. namespace tflite { +// For the TfLite transpose_conv implementation, input tensor 0 corresponds to +// the OutputShapeTensor. However, since TFLM does not support dynamic tensors, +// the TFLM implementation ignores input tensor 0 and the only inputs we care +// about are kFilterTensor, kInputTensor and kBiasTensor. +constexpr int kTransposeConvFilterTensor = 1; +constexpr int kTransposeConvInputTensor = 2; +constexpr int kTransposeConvBiasTensor = 3; +constexpr int kTransposeConvOutputTensor = 0; + +// Conv is quantized along dimension 0: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kTransposeConvQuantizedDimension = 0; + // This is the most generic TFLMRegistration. The actual supported types // may still be target dependent. The only requirement is that every // implementation (reference or optimized) must define this function. diff --git a/tensorflow/lite/micro/kernels/transpose_conv_test.cc b/tensorflow/lite/micro/kernels/transpose_conv_test.cc index 49d2c90f439..a4c78d91e80 100644 --- a/tensorflow/lite/micro/kernels/transpose_conv_test.cc +++ b/tensorflow/lite/micro/kernels/transpose_conv_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/lite/micro/kernels/transpose_conv.h" + +#include + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/kernels/conv_test.h" #include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/test_helpers.h" @@ -47,20 +50,127 @@ static const float kGoldenData[kOutputElements] = { 184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}; +#ifdef USE_TFLM_COMPRESSION + +constexpr size_t kTransposeConvMaxTensors = 5; +constexpr size_t kTransposeConvMaxInputTensors = 4; + +// compressed filter data for kBinQuant scheme, matches kFilterData +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantFilterData[] = { + 0x00, 0x44, 0x32, 0x14, 0xC7, 0x42, 0x54, 0xB6, 0x35, 0xCF, 0x84, 0x40}; +constexpr int kBinQuantFilterBitWidth = 5; +// compressed bias data for kBinQuant scheme, matches kBiasData +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantBiasData[] = {0x00}; +constexpr int kBinQuantBiasBitWidth = 1; + +// Common inputs and outputs (quantized single channel). +// data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel +static int kInputShapeQ1[] = {4, 1, 4, 4, 1}; +static constexpr float kInputDataQ1[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; +constexpr size_t kInputElementsQ1 = std::extent::value; + +constexpr int kNumChannelsQ1 = 1; +static int kFilterShapeQ1[] = {4, 1, 3, 3, 1}; +static constexpr float kFilterDataQ1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; +constexpr size_t kFilterElementsQ1 = + std::extent::value; + +static int kBiasShapeQ1[] = {1, 1}; +static constexpr float kBiasDataQ1[] = {1}; +constexpr size_t kBiasElementsQ1 = std::extent::value; + +static int kOutputShapeQ1[] = {4, 1, 4, 4, 1}; +static constexpr float kGoldenDataQ1[] = { + 30, 62, 84, 76, 100, 194, 238, 200, 208, 372, 418, 330, 264, 446, 486, 366}; +constexpr int kOutputElementsQ1 = std::extent::value; + +// compressed filter data for kBinQuant scheme, matches kFilterDataQ1 +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantFilterDataQ1[] = {0x01, 0x23, 0x45, 0x67, + 0x80}; +constexpr int kBinQuantFilterBitWidthQ1 = 4; +// compressed bias data for kBinQuant scheme, matches kBiasDataQ1 +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantBiasDataQ1[] = {0x00}; +constexpr int kBinQuantBiasBitWidthQ1 = 1; + +// Common inputs and outputs (quantized multi channel). +// data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 +static int kInputShapeQ2[] = {4, 1, 2, 3, 2}; +static constexpr float kInputDataQ2[] = { + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 +}; +constexpr size_t kInputElementsQ2 = std::extent::value; + +constexpr int kNumChannelsQ2 = 2; +static int kFilterShapeQ2[] = {4, 2, 2, 2, 2}; +// Original filter data: +// static constexpr float kFilterDataQ2[] = { +// // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] +// 1, 2, // out channel = 0, y = 0, x = 0 +// 3, 4, // out channel = 0, y = 0, x = 1 +// 3, 4, // out channel = 0, y = 1, x = 0 +// 5, 6, // out channel = 0, y = 1, x = 1 +// 7, 8, // out channel = 1, y = 0, x = 0 +// 5, 6, // out channel = 1, y = 0, x = 1 +// 3, 4, // out channel = 1, y = 1, x = 0 +// 1, 2, // out channel = 1, y = 1, x = 1 +// }; + +static int kBiasShapeQ2[] = {1, 2}; +static constexpr float kBiasDataQ2[] = {3, -2}; +constexpr size_t kBiasElementsQ2 = std::extent::value; + +static int kOutputShapeQ2[] = {4, 1, 2, 3, 2}; +static constexpr float kGoldenDataQ2[] = {10, 35, 19, 24, -6, -41, + 30, 64, 51, 40, -29, -64}; +constexpr int kOutputElementsQ2 = std::extent::value; + +// compressed filter data for kBinQuant scheme, matches kFilterDataQ2 +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantFilterDataQ2[] = {0x05, 0x34, 0xE5, + 0xDE, 0x54, 0xC1}; +constexpr float kBinQuantFilterValueTableQ2[] = {1, 2, 3, 4, 5, 6, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8}; +constexpr size_t kBinQuantFilterValueTableElementsQ2 = + std::extent::value; +constexpr int kBinQuantFilterBitWidthQ2 = 3; +// compressed bias data for kBinQuant scheme, matches kBiasDataQ2 +// Align the tensor data the same as a Buffer in the schema +alignas(16) constexpr uint8_t kBinQuantBiasDataQ2[] = {0x00}; +constexpr int kBinQuantBiasBitWidthQ2 = 1; + +#endif // USE_TFLM_COMPRESSION + // Transpose conv uses TfLiteConvParams. -static TfLiteConvParams common_conv_params = {kTfLitePaddingSame, // padding - 1, // stride_width - 1, // stride_height - kTfLiteActNone, - 1, - 1, - kTfLiteNoType}; +static const TfLiteConvParams common_conv_params = { + kTfLitePaddingSame, // padding + 1, // stride_width + 1, // stride_height + kTfLiteActNone, + 1, + 1, + kTfLiteNoType}; template -TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, - TfLiteConvParams* conv_params, - T* output_data) { +TfLiteStatus InvokeTransposeConv( + TfLiteTensor* tensors, int tensors_size, int output_length, + const TfLiteConvParams* conv_params, T* output_data +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* comp_list_p = nullptr +#endif // USE_TFLM_COMPRESSION +) { + // TODO(b/358151309): support optional bias tensor int inputs_array_data[] = {4, 0, 1, 2, 3}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 4}; @@ -68,7 +178,12 @@ TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size, const TFLMRegistration registration = tflite::Register_TRANSPOSE_CONV(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, - outputs_array, conv_params); + outputs_array, conv_params +#ifdef USE_TFLM_COMPRESSION + , + nullptr, comp_list_p +#endif // USE_TFLM_COMPRESSION + ); const char* init_data = reinterpret_cast(conv_params); TfLiteStatus status = runner.InitAndPrepare(init_data); @@ -78,15 +193,45 @@ TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size, return runner.Invoke(); } -template -TfLiteStatus ValidateTransposeConvGoldens(TfLiteTensor* tensors, - int tensors_size, - const T* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - T* output_data, float tolerance) { +template +TfLiteStatus ValidateTransposeConvGoldens( + TfLiteTensor* tensors, int tensors_size, const T* expected_output_data, + int output_length, const TfLiteConvParams* conv_params, T* output_data, + float tolerance = 1e-5f +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* filter_comp_info = nullptr, + const TestCompressionInfo* bias_comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { +#ifdef USE_TFLM_COMPRESSION + + TestCompressedList tcl; + if (filter_comp_info != nullptr) { + TF_LITE_MICRO_EXPECT_EQ( + tcl.AddInput(*filter_comp_info, tensors[kTransposeConvFilterTensor], + kTransposeConvFilterTensor), + kTfLiteOk); + TF_LITE_MICRO_CHECK_FAIL(); + } + if (bias_comp_info != nullptr) { + TF_LITE_MICRO_EXPECT_EQ( + tcl.AddInput(*bias_comp_info, tensors[kTransposeConvBiasTensor], + kTransposeConvBiasTensor), + kTfLiteOk); + TF_LITE_MICRO_CHECK_FAIL(); + } + const CompressedTensorList* comp_list_p = tcl.GetCompressedTensorList(); + +#endif // USE_TFLM_COMPRESSION + TfLiteStatus status = InvokeTransposeConv( - tensors, tensors_size, output_length, conv_params, output_data); + tensors, tensors_size, output_length, conv_params, output_data +#ifdef USE_TFLM_COMPRESSION + , + comp_list_p +#endif // USE_TFLM_COMPRESSION + ); if (status != kTfLiteOk) { return status; } @@ -101,7 +246,13 @@ TfLiteStatus TestTransposeConvFloat( int* input_dims_data, const float* input_data, int* filter_dims_data, const float* filter_data, int* bias_dims_data, const float* bias_data, int* output_dims_data, const float* expected_output_data, - TfLiteConvParams* conv_params, float* output_data) { + const TfLiteConvParams* conv_params, float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* filter_comp_info = nullptr, + const TestCompressionInfo* bias_comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); @@ -125,7 +276,12 @@ TfLiteStatus TestTransposeConvFloat( return ValidateTransposeConvGoldens(tensors, tensors_size, expected_output_data, output_dims_count, - conv_params, output_data, 0.001f); + conv_params, output_data +#ifdef USE_TFLM_COMPRESSION + , + 1e-5, filter_comp_info, bias_comp_info +#endif // USE_TFLM_COMPRESSION + ); } TfLiteStatus TestTransposeConvQuantized( @@ -135,8 +291,8 @@ TfLiteStatus TestTransposeConvQuantized( int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int8_t* expected_output_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - int8_t* output_data) { + float output_scale, int output_zero_point, + const TfLiteConvParams* conv_params, int8_t* output_data) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); @@ -181,8 +337,8 @@ TfLiteStatus TestTransposeConvQuantized( int* bias_dims_data, const float* bias_data, T* bias_quantized, float* bias_scales, int* bias_zero_points, int* output_dims_data, const float* expected_output_data, int16_t* expected_output_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - int16_t* output_data) { + float output_scale, int output_zero_point, + const TfLiteConvParams* conv_params, int16_t* output_data) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); @@ -221,6 +377,80 @@ TfLiteStatus TestTransposeConvQuantized( conv_params, output_data, 4.0f); } +#ifdef USE_TFLM_COMPRESSION + +template +TfLiteStatus TestTransposeConvQuantizedCompressed( + int* input_dims_data, const float* input_data, TIO* input_quantized, + float input_scale, int input_zero_point, int* output_dims_data, + const float* expected_output_data, TIO* expected_output_quantized, + TIO* output_quantized, float output_scale, int output_zero_point, + const TfLiteConvParams* conv_params, const unsigned int tolerance, + const TestCompressionQuantizedInfo* filter_comp_info, + const TestCompressionQuantizedInfo* bias_comp_info) { + // TODO(b/358151309): account for optional bias tensor + // bool null_bias = comp_info->bias_data == nullptr ? true : false; + + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_comp_info->dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_comp_info->dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + TfLiteFloatArray* filter_scales = + FloatArrayFromFloats(filter_comp_info->scales); + TfLiteIntArray* filter_zero_points = + IntArrayFromInts(filter_comp_info->zero_points); + TfLiteFloatArray* bias_scales = FloatArrayFromFloats(bias_comp_info->scales); + TfLiteIntArray* bias_zero_points = + IntArrayFromInts(bias_comp_info->zero_points); + + TfLiteAffineQuantization filter_quant = {}; + TfLiteTensor filter_tensor = CreatePerChannelQuantizedTensor( + filter_comp_info->compressed, filter_dims, filter_scales, + filter_zero_points, &filter_quant, kTransposeConvQuantizedDimension, + false /* is_variable */, kTfLiteInt8); + SymmetricPerChannelQuantize( + filter_comp_info->data, filter_comp_info->value_table, + filter_scales->size * filter_comp_info->value_table_stride, + filter_scales->size, filter_scales->data); + + TfLiteAffineQuantization bias_quant = {}; + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( + bias_comp_info->compressed, bias_dims, input_scale, filter_scales, + bias_scales, bias_zero_points, &bias_quant, + kTransposeConvQuantizedDimension, false /* is_variable */, + typeToTfLiteType()); + SymmetricPerChannelQuantize( + bias_comp_info->data, bias_comp_info->value_table, + bias_scales->size * bias_comp_info->value_table_stride, bias_scales->size, + bias_scales->data); + + int output_shape_dims_data[] = {1, 0}; + int32_t* output_shape = nullptr; + TfLiteIntArray* output_shape_dims = IntArrayFromInts(output_shape_dims_data); + + constexpr int tensors_size = kTransposeConvMaxTensors; + TfLiteTensor tensors[tensors_size] = { + CreateTensor(output_shape, output_shape_dims), + filter_tensor, + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point), + bias_tensor, + CreateQuantizedTensor(output_quantized, output_dims, output_scale, + output_zero_point), + }; + + const int output_dims_count = ElementCount(*output_dims); + Quantize(expected_output_data, expected_output_quantized, output_dims_count, + output_scale, output_zero_point); + return ValidateTransposeConvGoldens( + tensors, tensors_size, expected_output_quantized, output_dims_count, + conv_params, output_quantized, tolerance, filter_comp_info, + bias_comp_info); +} + +#endif // USE_TFLM_COMPRESSION + } // namespace } // namespace testing } // namespace tflite @@ -240,6 +470,41 @@ TF_LITE_MICRO_TEST(SimpleTestFloat) { &tflite::testing::common_conv_params, output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestFloatCompressed) { + tflite::testing::TestCompressionInfo filter_comp_info = {}; + tflite::testing::TestCompressionInfo bias_comp_info = {}; + + filter_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + filter_comp_info.value_table = tflite::testing::kFilterData; + filter_comp_info.value_table_stride = + std::extent::value; + filter_comp_info.bit_width = tflite::testing::kBinQuantFilterBitWidth; + + bias_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + bias_comp_info.value_table = tflite::testing::kBiasData; + bias_comp_info.value_table_stride = + std::extent::value; + bias_comp_info.bit_width = tflite::testing::kBinQuantBiasBitWidth; + + float output_data[tflite::testing::kOutputElements]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvFloat( + tflite::testing::kInputShape, tflite::testing::kInputData, + tflite::testing::kFilterShape, + reinterpret_cast(tflite::testing::kBinQuantFilterData), + tflite::testing::kBiasShape, + reinterpret_cast(tflite::testing::kBinQuantBiasData), + tflite::testing::kOutputShape, tflite::testing::kGoldenData, + &tflite::testing::common_conv_params, output_data, &filter_comp_info, + &bias_comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(fusedRELUTest) { float output_data[tflite::testing::kOutputElements]; float golden_data[] = {29, 24, 0, 0, 99, 72, 0, 0, @@ -476,4 +741,202 @@ TF_LITE_MICRO_TEST(HybridModeIsError) { &tflite::testing::common_conv_params, output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelSingleChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel + const float input_scale = 16.0f / 255.0f; + const float output_scale = 2.0f; + const int input_zero_point = -128; + const int output_zero_point = -128; + constexpr float filter_scales[] = { + tflite::testing::kNumChannelsQ1, + 9.0f / 127.0f, + }; + constexpr int filter_zero_points[] = { + tflite::testing::kNumChannelsQ1, + 0, + }; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int8_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int32_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int8_t golden_quantized[tflite::testing::kOutputElementsQ1]; + int8_t output_quantized[tflite::testing::kOutputElementsQ1]; + + tflite::testing::TestCompressionQuantizedInfo filter_comp_info = {}; + tflite::testing::TestCompressionQuantizedInfo bias_comp_info = {}; + + filter_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + filter_comp_info.value_table = filter_quantized; + filter_comp_info.value_table_stride = + tflite::testing::kFilterElementsQ1 / tflite::testing::kNumChannelsQ1; + filter_comp_info.bit_width = tflite::testing::kBinQuantFilterBitWidthQ1; + filter_comp_info.compressed = tflite::testing::kBinQuantFilterDataQ1; + filter_comp_info.data = tflite::testing::kFilterDataQ1; + filter_comp_info.dims_data = tflite::testing::kFilterShapeQ1; + filter_comp_info.scales = filter_scales; + filter_comp_info.zero_points = filter_zero_points; + + bias_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + bias_comp_info.value_table = bias_quantized; + bias_comp_info.value_table_stride = + tflite::testing::kBiasElementsQ1 / tflite::testing::kNumChannelsQ1; + bias_comp_info.bit_width = tflite::testing::kBinQuantBiasBitWidthQ1; + bias_comp_info.compressed = tflite::testing::kBinQuantBiasDataQ1; + bias_comp_info.data = tflite::testing::kBiasDataQ1; + bias_comp_info.dims_data = tflite::testing::kBiasShapeQ1; + bias_comp_info.scales = bias_scales; + bias_comp_info.zero_points = bias_zero_points; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, 0, &filter_comp_info, + &bias_comp_info)); +} + +TF_LITE_MICRO_TEST( + SimpleBiasTestQuantizedPerChannelBias16MultiChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + constexpr float filter_scales[] = { + tflite::testing::kNumChannelsQ2, + 7.0f / 127.0f, + 8.0f / 127.0f, + }; + constexpr int filter_zero_points[] = { + tflite::testing::kNumChannelsQ2, + 0, + 0, + }; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kBinQuantFilterValueTableElementsQ2]; + int16_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t golden_quantized[tflite::testing::kOutputElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + + tflite::testing::TestCompressionQuantizedInfo filter_comp_info = {}; + tflite::testing::TestCompressionQuantizedInfo bias_comp_info = {}; + + filter_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + filter_comp_info.value_table = filter_quantized; + filter_comp_info.value_table_stride = + tflite::testing::kBinQuantFilterValueTableElementsQ2 / + tflite::testing::kNumChannelsQ2; + filter_comp_info.bit_width = tflite::testing::kBinQuantFilterBitWidthQ2; + filter_comp_info.compressed = tflite::testing::kBinQuantFilterDataQ2; + filter_comp_info.data = tflite::testing::kBinQuantFilterValueTableQ2; + filter_comp_info.dims_data = tflite::testing::kFilterShapeQ2; + filter_comp_info.scales = filter_scales; + filter_comp_info.zero_points = filter_zero_points; + + bias_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + bias_comp_info.value_table = bias_quantized; + bias_comp_info.value_table_stride = + tflite::testing::kBiasElementsQ2 / tflite::testing::kNumChannelsQ2; + bias_comp_info.bit_width = tflite::testing::kBinQuantBiasBitWidthQ2; + bias_comp_info.compressed = tflite::testing::kBinQuantBiasDataQ2; + bias_comp_info.data = tflite::testing::kBiasDataQ2; + bias_comp_info.dims_data = tflite::testing::kBiasShapeQ2; + bias_comp_info.scales = bias_scales; + bias_comp_info.zero_points = bias_zero_points; + + // The quantized output is compared to the expected output (quantized). + // A tolerance of 81 is approx. 0.1582f which is less than the TfLite + // tolerance of 0.19f. + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, 81, &filter_comp_info, + &bias_comp_info)); +} + +TF_LITE_MICRO_TEST( + SimpleBiasTestQuantizedPerChannelBias64MultiChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + constexpr float filter_scales[] = { + tflite::testing::kNumChannelsQ2, + 7.0f / 127.0f, + 8.0f / 127.0f, + }; + constexpr int filter_zero_points[] = { + tflite::testing::kNumChannelsQ2, + 0, + 0, + }; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kBinQuantFilterValueTableElementsQ2]; + int64_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t golden_quantized[tflite::testing::kOutputElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + + tflite::testing::TestCompressionQuantizedInfo filter_comp_info = {}; + tflite::testing::TestCompressionQuantizedInfo bias_comp_info = {}; + + filter_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + filter_comp_info.value_table = filter_quantized; + filter_comp_info.value_table_stride = + tflite::testing::kBinQuantFilterValueTableElementsQ2 / + tflite::testing::kNumChannelsQ2; + filter_comp_info.bit_width = tflite::testing::kBinQuantFilterBitWidthQ2; + filter_comp_info.compressed = tflite::testing::kBinQuantFilterDataQ2; + filter_comp_info.data = tflite::testing::kBinQuantFilterValueTableQ2; + filter_comp_info.dims_data = tflite::testing::kFilterShapeQ2; + filter_comp_info.scales = filter_scales; + filter_comp_info.zero_points = filter_zero_points; + + bias_comp_info.scheme = tflite::CompressionScheme::kBinQuant; + bias_comp_info.value_table = bias_quantized; + bias_comp_info.value_table_stride = + tflite::testing::kBiasElementsQ2 / tflite::testing::kNumChannelsQ2; + bias_comp_info.bit_width = tflite::testing::kBinQuantBiasBitWidthQ2; + bias_comp_info.compressed = tflite::testing::kBinQuantBiasDataQ2; + bias_comp_info.data = tflite::testing::kBiasDataQ2; + bias_comp_info.dims_data = tflite::testing::kBiasShapeQ2; + bias_comp_info.scales = bias_scales; + bias_comp_info.zero_points = bias_zero_points; + + // The quantized output is compared to the expected output (quantized). + // A tolerance of 81 is approx. 0.1582f which is less than the TfLite + // tolerance of 0.19f. + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, 81, &filter_comp_info, + &bias_comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc b/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc index 44a9f86049c..ba08a99f1b6 100644 --- a/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,6 +49,14 @@ struct OpData { // A scratch buffer is required for quantized implementations. int scratch_buffer_index; +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int filter_scratch_index; + int bias_scratch_index; + +#endif // USE_TFLM_COMPRESSION + // TODO(b/192090531): Remove this once all 8x16 transpose conv models use // 64-bit biases. int bias_converted_buffer_index; @@ -268,6 +276,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { data->params.stride_width = params->stride_width; data->params.stride_height = params->stride_height; +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->filter_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kFilterTensor); + data->bias_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kBiasTensor); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(output); micro_context->DeallocateTempTfLiteTensor(input); micro_context->DeallocateTempTfLiteTensor(filter); @@ -286,6 +305,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); +#ifdef USE_TFLM_COMPRESSION + + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* filter_comp_td = + micro_context->GetTensorCompressionData(node, kFilterTensor); + const CompressionTensorData* bias_comp_td = + micro_context->GetTensorCompressionData(node, kBiasTensor); + +#endif // USE_TFLM_COMPRESSION + TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); @@ -309,9 +339,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { op_params, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr); @@ -321,7 +359,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int32_t* scratch_buffer = static_cast( context->GetScratchBuffer(context, data.scratch_buffer_index)); #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5) - if (bias->type == kTfLiteInt32) { + if (bias != nullptr && bias->type == kTfLiteInt32) { const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter); @@ -343,9 +381,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); const int8_t* input_data = tflite::micro::GetTensorData(input); +#ifdef USE_TFLM_COMPRESSION + const int8_t* filter_data = tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index); + const int32_t* bias_data = tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index); +#else // USE_TFLM_COMPRESSION const int8_t* filter_data = tflite::micro::GetTensorData(filter); const int32_t* bias_data = tflite::micro::GetTensorData(bias); +#endif // USE_TFLM_COMPRESSION int8_t* output_data = tflite::micro::GetTensorData(output); const int num_elements = output_shape.FlatSize(); @@ -369,9 +414,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); @@ -382,9 +436,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); @@ -396,20 +458,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { context->GetScratchBuffer(context, data.scratch_buffer_index)); // TODO(b/192090531): Remove this once all 8x16 transpose conv models use // 64-bit biases. - if (bias->type == kTfLiteInt16) { - std::int64_t* bias_converted_buffer = - static_cast(context->GetScratchBuffer( - context, data.bias_converted_buffer_index)); - for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize(); - i++) { - bias_converted_buffer[i] = bias->data.i16[i]; + if (bias == nullptr || bias->type == kTfLiteInt16) { + std::int64_t* bias_converted_buffer = nullptr; + if (bias != nullptr) { + bias_converted_buffer = + static_cast(context->GetScratchBuffer( + context, data.bias_converted_buffer_index)); + const int16_t* const bias_int16_data = +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index); +#else // USE_TFLM_COMPRESSION + static_cast(bias->data.data); +#endif // USE_TFLM_COMPRESSION + for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize(); + i++) { + bias_converted_buffer[i] = bias_int16_data[i]; + } } reference_integer_ops::TransposeConv( data.params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(bias), bias_converted_buffer, tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), @@ -438,9 +516,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int output_width = output_shape.Dims(2); const int16_t* input_data = tflite::micro::GetTensorData(input); +#ifdef USE_TFLM_COMPRESSION + const int8_t* filter_data = tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index); + const int64_t* bias_data = tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index); +#else // USE_TFLM_COMPRESSION const int8_t* filter_data = tflite::micro::GetTensorData(filter); const int64_t* bias_data = tflite::micro::GetTensorData(bias); +#endif // USE_TFLM_COMPRESSION int16_t* output_data = tflite::micro::GetTensorData(output); const int num_elements = output_shape.FlatSize(); @@ -457,15 +542,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, data.per_channel_output_multiplier, scratch_buffer); } -#else // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5) +#else // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5) reference_integer_ops::TransposeConv( data.params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);