tensorflow · rkuester · Dec 6, 2024
@@ -27,6 +27,16 @@ tflm_cc_library(
     ],
 )
 
+tflm_cc_library(
+    name = "compression",
+    hdrs = [
+        "compression.h",
+    ],
+    deps = [
+        "//tensorflow/lite/c:common",
+    ],
+)
+
 tflm_cc_library(
     # TODO(b/187093492): Rename to micro_interpreter.
     name = "micro_framework",
@@ -62,10 +72,14 @@ tflm_cc_library(
         "micro_context.h",
     ],
     deps = [
+        ":compression",
         ":micro_common",
         ":micro_graph",
         ":micro_log",
+        ":micro_profiler",
+        "//tensorflow/lite:type_to_tflitetype",
         "//tensorflow/lite/c:common",
+        "//tensorflow/lite/micro/kernels:decompress",
     ],
 )
 
@@ -135,6 +149,7 @@ tflm_cc_library(
         ":memory_helpers",
         ":micro_allocator",
         ":micro_common",
+        ":micro_context",
         ":micro_graph",
         ":micro_log",
         ":micro_profiler",
@@ -162,6 +177,7 @@ tflm_cc_library(
 tflm_cc_library(
     name = "micro_allocator",
     srcs = [
+        "compression.h",
         "micro_allocation_info.cc",
         "micro_allocator.cc",
     ],
@@ -170,6 +186,7 @@ tflm_cc_library(
         "micro_allocator.h",
     ],
     deps = [
+        ":compression",
         ":flatbuffer_utils",
         ":memory_helpers",
         ":micro_arena_constants",
@@ -182,6 +199,7 @@ tflm_cc_library(
         "//tensorflow/lite/micro/arena_allocator:non_persistent_arena_buffer_allocator",
         "//tensorflow/lite/micro/arena_allocator:persistent_arena_buffer_allocator",
         "//tensorflow/lite/micro/arena_allocator:simple_memory_allocator",
+        "//tensorflow/lite/micro/compression:metadata_saved",
         "//tensorflow/lite/micro/memory_planner:greedy_memory_planner",
         "//tensorflow/lite/micro/memory_planner:linear_memory_planner",
         "//tensorflow/lite/micro/memory_planner:micro_memory_planner",
@@ -235,7 +253,9 @@ tflm_cc_library(
         "test_helpers.h",
     ],
     deps = [
+        ":compression",
         ":memory_helpers",
+        ":micro_log",
         ":micro_utils",
         ":op_resolvers",
         "//tensorflow/lite:type_to_tflitetype",

@@ -74,8 +74,6 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
   // takes in account any temporary allocations.
   size_t GetAvailableMemory(size_t alignment) const override;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-
  private:
   // The memory arena that this allocator manages.
   uint8_t* const buffer_head_;
@@ -97,6 +95,8 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
   // Count of outstanding temp buffers.
   int temp_buffer_count_ = 0;
   bool resizable_buffer_allocated_ = false;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

@@ -39,7 +39,6 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
   // Returns the size of all persistent allocations in bytes.
   size_t GetPersistentUsedBytes() const override;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
  private:
   // The memory arena that this allocator manages.
   uint8_t* const buffer_head_;
@@ -51,6 +50,8 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
   // So in essence, the allocated region grows from the bottom and emulates
   // SingleArenaBufferAllocator's persistent part.
   uint8_t* tail_temp_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

@@ -110,8 +110,6 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
   // account any temporary allocations.
   size_t GetUsedBytes() const;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-
  protected:
   // Returns a pointer to the current end of the head buffer.
   uint8_t* head() const;
@@ -137,6 +135,8 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
   intptr_t temp_buffer_ptr_check_sum_ = 0;
   // Count of outstanding temp buffers.
   int temp_buffer_count_ = 0;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

@@ -0,0 +1,68 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
+
+#ifdef USE_TFLM_COMPRESSION
+
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+//
+// Compressed tensors
+//
+
+static constexpr const char* kCompressionMetadataString =
+    "COMPRESSION_METADATA";
+
+enum class CompressionScheme : uint8_t {
+  kBinQuant,
+};
+
+struct LookupTableData {
+  static constexpr size_t kMaxBitWidth = 7;
+  static constexpr size_t kMaxValueTableChannelStride = 128;
+
+  const void* value_table;             // Pointer into FlatBuffer Values.
+  uint8_t value_table_channel_stride;  // elements per channel
+  uint8_t compressed_bit_width : 3;    // 1 to 7 bits
+  bool is_per_channel_quantized : 1;   // tensor is per-channel quantized
+  bool use_alternate_axis : 1;         // shape default channel:
+                                       // 0 = first, 1 = last
+  uint8_t reserved : 3;
+};
+
+union CompressionData {
+  LookupTableData* lut_data;
+};
+
+struct CompressionTensorData {
+  CompressionScheme scheme;
+  CompressionData data;
+};
+
+struct CompressedTensorList {
+  // Sparsely populated array with the same number of elements as there are
+  // tensors in the Subgraph. An alternative would include a tensor index in
+  // the struct for each and walk the list on look up. This could be slow.
+  const CompressionTensorData** tensors;
+};
+
+}  // namespace tflite
+
+#endif  // USE_TFLM_COMPRESSION
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
@@ -100,10 +100,37 @@ def __init__(self, operator, index, subgraph):
   def opcode(self) -> tflite.OperatorCodeT:
     return self.subgraph.model.operatorCodes[self.operator.opcodeIndex]
 
+  @property
+  def builtin_opcode(self) -> int:
+    result: int = self.opcode.deprecatedBuiltinCode
+    if result == tflite.BuiltinOperator.PLACEHOLDER_FOR_GREATER_OP_CODES:
+      result = self.opcode.builtinCode
+    return result
+
   @property
   def inputs(self):
     return _IndirectIterator(self.operator.inputs, self.subgraph.tensors)
 
+  @property
+  def outputs(self):
+    return _IndirectIterator(self.operator.outputs, self.subgraph.tensors)
+
+  @property
+  def inputs_indices(self):
+    return self.operator.inputs
+
+  @property
+  def outputs_indices(self):
+    return self.operator.outputs
+
+  @property
+  def builtin_options_type(self) -> int:
+    return self.operator.builtinOptionsType
+
+  @property
+  def builtin_options(self):
+    return self.operator.builtinOptions
+
 
 _NP_DTYPES = {
     tflite.TensorType.FLOAT16: np.dtype("<f2"),

@@ -276,7 +276,7 @@ bazel run --cache_test_results=no --test_output=all -s  tensorflow/lite/micro/to
 
 The Generic Benchmark Application can be used to see the size of the model, the
 amount of arena memory used, and the size of the interpreter data structures
-including those involved with tensor conpression.
+including those involved with tensor compression.
 
 The benchmark also reports total inference time, as well as time taken for
 tensor decompression.  Timing data may be either wall-clock time or processor

@@ -37,7 +37,7 @@ namespace {
 // Arena size is a guesstimate, followed by use of
 // MicroInterpreter::arena_used_bytes() on both the AudioPreprocessor and
 // MicroSpeech models and using the larger of the two results.
-constexpr size_t kArenaSize = 28584;  // xtensa p6
+constexpr size_t kArenaSize = 30 * 1024;
 alignas(16) uint8_t g_arena[kArenaSize];
 
 using Features = int8_t[kFeatureCount][kFeatureSize];

@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,10 +23,23 @@ limitations under the License.
 
 namespace tflite {
 
-FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
-                                   SingleArenaBufferAllocator* allocator,
-                                   MicroGraph* micro_graph)
-    : graph_(*micro_graph), tensors_(tensors), allocator_(allocator) {}
+FakeMicroContext::FakeMicroContext(
+    TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
+    MicroGraph* micro_graph
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const CompressedTensorList* compressed_tensors
+#endif  // USE_TFLM_COMPRESSION
+    )
+    : graph_(*micro_graph),
+      tensors_(tensors),
+      allocator_(allocator)
+#ifdef USE_TFLM_COMPRESSION
+      ,
+      compressed_tensors_(compressed_tensors)
+#endif  // USE_TFLM_COMPRESSION
+{
+}
 
 TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
   allocated_temp_count_++;
@@ -112,4 +125,60 @@ void* FakeMicroContext::external_context() { return nullptr; }
 
 MicroGraph& FakeMicroContext::graph() { return graph_; }
 
+#ifdef USE_TFLM_COMPRESSION
+
+// Available during Prepare & Eval. Returns false if tensor is not
+// compressed.
+bool FakeMicroContext::IsTensorCompressed(const TfLiteNode* node,
+                                          int tensor_idx) {
+  if (compressed_tensors_ != nullptr && tensor_idx < node->inputs->size) {
+    int index = node->inputs->data[tensor_idx];
+    if (index >= 0 && compressed_tensors_->tensors[index] != nullptr) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Only available during Prepare. The kernel is responsible for storing the
+// scratch buffer handle.
+int FakeMicroContext::AllocateDecompressionScratchBuffer(const TfLiteNode* node,
+                                                         int tensor_idx) {
+  if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
+    return -1;
+  }
+  int index = node->inputs->data[tensor_idx];
+  if (index < 0 || compressed_tensors_->tensors[index] == nullptr) {
+    return -1;
+  }
+  TfLiteTensor* tensor = &tensors_[index];
+  int scratch_index = -1;
+  TfLiteStatus result =
+      RequestScratchBufferInArena(tensor->bytes, &scratch_index);
+  if (result != kTfLiteOk) {
+    return -1;
+  }
+
+  return scratch_index;
+}
+
+// Available during Prepare & Eval. Returns nullptr if tensor is not
+// compressed.
+const CompressionTensorData* FakeMicroContext::GetTensorCompressionData(
+    const TfLiteNode* node, int tensor_idx) {
+  if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
+    return nullptr;
+  }
+
+  int index = node->inputs->data[tensor_idx];
+  if (index < 0) {
+    return nullptr;
+  }
+
+  return compressed_tensors_->tensors[index];
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace tflite