diff --git a/tensorflow/lite/micro/compression.h b/tensorflow/lite/micro/compression.h
index 43965c2bf48..197d5cd9634 100644
--- a/tensorflow/lite/micro/compression.h
+++ b/tensorflow/lite/micro/compression.h
@@ -26,7 +26,8 @@ namespace tflite {
 // Compressed tensors
 //
 
-static constexpr const char* kCompressionMetadataString = "TFLM_COMPRESSION";
+static constexpr const char* kCompressionMetadataString =
+    "COMPRESSION_METADATA";
 
 enum class CompressionScheme : uint8_t {
   kBinQuant,
@@ -61,7 +62,7 @@ struct CompressedTensorList {
   // Sparsely populated array with the same number of elements as there are
   // tensors in the Subgraph. An alternative would include a tensor index in
   // the struct for each and walk the list on look up. This could be slow.
-  CompressionTensorData** tensors;
+  const CompressionTensorData** tensors;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/compression/BUILD b/tensorflow/lite/micro/compression/BUILD
new file mode 100644
index 00000000000..cfa3e8a5077
--- /dev/null
+++ b/tensorflow/lite/micro/compression/BUILD
@@ -0,0 +1,42 @@
+load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_py_library")
+load("@rules_python//python:defs.bzl", "py_test")
+load("@tflm_pip_deps//:requirements.bzl", "requirement")
+
+package(
+    default_visibility = [
+        "//visibility:public",
+    ],
+)
+
+flatbuffer_cc_library(
+    name = "metadata_flatbuffer_cc",
+    srcs = ["metadata.fbs"],
+)
+
+flatbuffer_py_library(
+    name = "metadata_flatbuffer_py",
+    srcs = ["metadata.fbs"],
+)
+
+cc_test(
+    name = "metadata_test_cc",
+    srcs = ["metadata_test.cc"],
+    deps = [
+        "metadata_flatbuffer_cc",
+        "//tensorflow/lite/micro:hexdump",
+        "@flatbuffers//:runtime_cc",
+    ],
+    size = "small",
+)
+
+py_test(
+    name = "metadata_test_py",
+    main = "metadata_test.py",
+    srcs = ["metadata_test.py"],
+    deps = [
+        "metadata_flatbuffer_py",
+        "@flatbuffers//:runtime_py",
+        requirement("hexdump"),
+    ],
+    size = "small",
+)
diff --git a/tensorflow/lite/micro/compression/metadata.fbs b/tensorflow/lite/micro/compression/metadata.fbs
new file mode 100644
index 00000000000..dbbe7b0e405
--- /dev/null
+++ b/tensorflow/lite/micro/compression/metadata.fbs
@@ -0,0 +1,49 @@
+// Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace tflite.micro.compression;
+
+table Metadata {
+    // Compression data root, to be used in a tflite.Model.metadata field with
+    // the key "COMPRESSION_METADATA".
+
+    subgraphs:[Subgraph];  // compression data indexed by subgraph index
+}
+
+table Subgraph {
+    // Per-subgraph compression metadata.
+
+    lut_tensors:[LutTensor];
+        // ^ A list of tensors which are compressed using the
+        // (L)ook-(U)p-(T)able method. The indices of this vector are not
+        // significant.
+}
+
+table LutTensor {
+    // Look-Up-Table Tensor: a tensor representation where elements are
+    // compressed into indices into a table of values. The indices are unsigned
+    // integers, index_bitwidth-wide, in big-endian bit order, packed into the
+    // buffer identified by the corresponding tflite.Tensor's buffer field. The
+    // values are located in a newly-created buffer, encoded according to the
+    // tflite.Tensor.type. Tensors with multiple channels have distinct values
+    // tables for each channel, concatenated one after another in the buffer.
+    // An element's LUT index must be looked up in the value table for its
+    // channel.
+
+    tensor:int;            // index of the corresponding tflite.Tensor
+    value_buffer:uint;     // index of the buffer containing LUT values
+    index_bitwidth:uint8;  // bit-width of LUT indexes
+}
+
+root_type Metadata;
diff --git a/tensorflow/lite/micro/compression/metadata_generated.h b/tensorflow/lite/micro/compression/metadata_generated.h
new file mode 100644
index 00000000000..6b3af3b3e2e
--- /dev/null
+++ b/tensorflow/lite/micro/compression/metadata_generated.h
@@ -0,0 +1,228 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_
+#define FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+// Ensure the included flatbuffers.h is the same version as when this file was
+// generated, otherwise it may not be compatible.
+static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
+              FLATBUFFERS_VERSION_MINOR == 5 &&
+              FLATBUFFERS_VERSION_REVISION == 26,
+             "Non-compatible flatbuffers version included");
+
+namespace tflite {
+namespace micro {
+namespace compression {
+
+struct Metadata;
+struct MetadataBuilder;
+
+struct Subgraph;
+struct SubgraphBuilder;
+
+struct LutTensor;
+struct LutTensorBuilder;
+
+struct Metadata FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SUBGRAPHS = 4
+  };
+  const ::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>> *subgraphs() const {
+    return GetPointer<const ::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>> *>(VT_SUBGRAPHS);
+  }
+  bool Verify(::flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_SUBGRAPHS) &&
+           verifier.VerifyVector(subgraphs()) &&
+           verifier.VerifyVectorOfTables(subgraphs()) &&
+           verifier.EndTable();
+  }
+};
+
+struct MetadataBuilder {
+  typedef Metadata Table;
+  ::flatbuffers::FlatBufferBuilder &fbb_;
+  ::flatbuffers::uoffset_t start_;
+  void add_subgraphs(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>>> subgraphs) {
+    fbb_.AddOffset(Metadata::VT_SUBGRAPHS, subgraphs);
+  }
+  explicit MetadataBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ::flatbuffers::Offset<Metadata> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = ::flatbuffers::Offset<Metadata>(end);
+    return o;
+  }
+};
+
+inline ::flatbuffers::Offset<Metadata> CreateMetadata(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>>> subgraphs = 0) {
+  MetadataBuilder builder_(_fbb);
+  builder_.add_subgraphs(subgraphs);
+  return builder_.Finish();
+}
+
+inline ::flatbuffers::Offset<Metadata> CreateMetadataDirect(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>> *subgraphs = nullptr) {
+  auto subgraphs__ = subgraphs ? _fbb.CreateVector<::flatbuffers::Offset<tflite::micro::compression::Subgraph>>(*subgraphs) : 0;
+  return tflite::micro::compression::CreateMetadata(
+      _fbb,
+      subgraphs__);
+}
+
+struct Subgraph FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
+  typedef SubgraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_LUT_TENSORS = 4
+  };
+  const ::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>> *lut_tensors() const {
+    return GetPointer<const ::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>> *>(VT_LUT_TENSORS);
+  }
+  bool Verify(::flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_LUT_TENSORS) &&
+           verifier.VerifyVector(lut_tensors()) &&
+           verifier.VerifyVectorOfTables(lut_tensors()) &&
+           verifier.EndTable();
+  }
+};
+
+struct SubgraphBuilder {
+  typedef Subgraph Table;
+  ::flatbuffers::FlatBufferBuilder &fbb_;
+  ::flatbuffers::uoffset_t start_;
+  void add_lut_tensors(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>>> lut_tensors) {
+    fbb_.AddOffset(Subgraph::VT_LUT_TENSORS, lut_tensors);
+  }
+  explicit SubgraphBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ::flatbuffers::Offset<Subgraph> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = ::flatbuffers::Offset<Subgraph>(end);
+    return o;
+  }
+};
+
+inline ::flatbuffers::Offset<Subgraph> CreateSubgraph(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>>> lut_tensors = 0) {
+  SubgraphBuilder builder_(_fbb);
+  builder_.add_lut_tensors(lut_tensors);
+  return builder_.Finish();
+}
+
+inline ::flatbuffers::Offset<Subgraph> CreateSubgraphDirect(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>> *lut_tensors = nullptr) {
+  auto lut_tensors__ = lut_tensors ? _fbb.CreateVector<::flatbuffers::Offset<tflite::micro::compression::LutTensor>>(*lut_tensors) : 0;
+  return tflite::micro::compression::CreateSubgraph(
+      _fbb,
+      lut_tensors__);
+}
+
+struct LutTensor FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
+  typedef LutTensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TENSOR = 4,
+    VT_VALUE_BUFFER = 6,
+    VT_INDEX_BITWIDTH = 8
+  };
+  int32_t tensor() const {
+    return GetField<int32_t>(VT_TENSOR, 0);
+  }
+  uint32_t value_buffer() const {
+    return GetField<uint32_t>(VT_VALUE_BUFFER, 0);
+  }
+  uint8_t index_bitwidth() const {
+    return GetField<uint8_t>(VT_INDEX_BITWIDTH, 0);
+  }
+  bool Verify(::flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_TENSOR, 4) &&
+           VerifyField<uint32_t>(verifier, VT_VALUE_BUFFER, 4) &&
+           VerifyField<uint8_t>(verifier, VT_INDEX_BITWIDTH, 1) &&
+           verifier.EndTable();
+  }
+};
+
+struct LutTensorBuilder {
+  typedef LutTensor Table;
+  ::flatbuffers::FlatBufferBuilder &fbb_;
+  ::flatbuffers::uoffset_t start_;
+  void add_tensor(int32_t tensor) {
+    fbb_.AddElement<int32_t>(LutTensor::VT_TENSOR, tensor, 0);
+  }
+  void add_value_buffer(uint32_t value_buffer) {
+    fbb_.AddElement<uint32_t>(LutTensor::VT_VALUE_BUFFER, value_buffer, 0);
+  }
+  void add_index_bitwidth(uint8_t index_bitwidth) {
+    fbb_.AddElement<uint8_t>(LutTensor::VT_INDEX_BITWIDTH, index_bitwidth, 0);
+  }
+  explicit LutTensorBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ::flatbuffers::Offset<LutTensor> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = ::flatbuffers::Offset<LutTensor>(end);
+    return o;
+  }
+};
+
+inline ::flatbuffers::Offset<LutTensor> CreateLutTensor(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t tensor = 0,
+    uint32_t value_buffer = 0,
+    uint8_t index_bitwidth = 0) {
+  LutTensorBuilder builder_(_fbb);
+  builder_.add_value_buffer(value_buffer);
+  builder_.add_tensor(tensor);
+  builder_.add_index_bitwidth(index_bitwidth);
+  return builder_.Finish();
+}
+
+inline const tflite::micro::compression::Metadata *GetMetadata(const void *buf) {
+  return ::flatbuffers::GetRoot<tflite::micro::compression::Metadata>(buf);
+}
+
+inline const tflite::micro::compression::Metadata *GetSizePrefixedMetadata(const void *buf) {
+  return ::flatbuffers::GetSizePrefixedRoot<tflite::micro::compression::Metadata>(buf);
+}
+
+inline bool VerifyMetadataBuffer(
+    ::flatbuffers::Verifier &verifier) {
+  return verifier.VerifyBuffer<tflite::micro::compression::Metadata>(nullptr);
+}
+
+inline bool VerifySizePrefixedMetadataBuffer(
+    ::flatbuffers::Verifier &verifier) {
+  return verifier.VerifySizePrefixedBuffer<tflite::micro::compression::Metadata>(nullptr);
+}
+
+inline void FinishMetadataBuffer(
+    ::flatbuffers::FlatBufferBuilder &fbb,
+    ::flatbuffers::Offset<tflite::micro::compression::Metadata> root) {
+  fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedMetadataBuffer(
+    ::flatbuffers::FlatBufferBuilder &fbb,
+    ::flatbuffers::Offset<tflite::micro::compression::Metadata> root) {
+  fbb.FinishSizePrefixed(root);
+}
+
+}  // namespace compression
+}  // namespace micro
+}  // namespace tflite
+
+#endif  // FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_
diff --git a/tensorflow/lite/micro/compression/metadata_test.cc b/tensorflow/lite/micro/compression/metadata_test.cc
new file mode 100644
index 00000000000..dd0575fa683
--- /dev/null
+++ b/tensorflow/lite/micro/compression/metadata_test.cc
@@ -0,0 +1,77 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+// Test validity of the flatbuffer schema and illustrate use of the flatbuffer
+// machinery with C++.
+
+#include <iostream>
+#include <vector>
+
+#include "metadata_generated.h"
+#include "tensorflow/lite/micro/hexdump.h"
+
+using tflite::micro::compression::Metadata;
+using tflite::micro::compression::MetadataT;
+using tflite::micro::compression::Subgraph;
+using tflite::micro::compression::SubgraphT;
+using tflite::micro::compression::LutTensor;
+using tflite::micro::compression::LutTensorT;
+
+bool operator==(const LutTensorT& a, const LutTensor& b) {
+  return 
+    a.tensor == b.tensor() &&
+    a.value_buffer == b.value_buffer() &&
+    a.index_bitwidth == b.index_bitwidth();
+}
+
+int main(int argc, char* argv[]) {
+  // Create these objects on the stack and copy them into the subgraph's vector
+  // later, so that we can compare to these objects to what we read from the
+  // flatbuffer later.
+  LutTensorT lut_tensor0;
+  lut_tensor0.tensor = 63;
+  lut_tensor0.value_buffer = 128;
+  lut_tensor0.index_bitwidth = 2;
+
+  LutTensorT lut_tensor1;
+  lut_tensor1.tensor = 64;
+  lut_tensor1.value_buffer = 129;
+  lut_tensor1.index_bitwidth = 4;
+
+  auto subgraph0 = std::make_unique<SubgraphT>();
+  subgraph0->lut_tensors.push_back(std::make_unique<LutTensorT>(lut_tensor0));
+  subgraph0->lut_tensors.push_back(std::make_unique<LutTensorT>(lut_tensor1));
+
+  auto metadata = std::make_unique<MetadataT>();
+  metadata->subgraphs.push_back(std::move(subgraph0));
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto root = Metadata::Pack(builder, metadata.get());
+  builder.Finish(root);
+  const uint8_t* buffer = builder.GetBufferPointer();
+  const size_t buffer_size = builder.GetSize();
+
+  tflite::hexdump(
+      {reinterpret_cast<const std::byte*>(buffer), buffer_size});
+  std::cout << "length: " << buffer_size << "\n";
+
+  const Metadata* read_metadata = tflite::micro::compression::GetMetadata(buffer);
+  const Subgraph* read_subgraph0 = read_metadata->subgraphs()->Get(0);
+  const LutTensor* read_lut_tensor0 = read_subgraph0->lut_tensors()->Get(0);
+  const LutTensor* read_lut_tensor1 = read_subgraph0->lut_tensors()->Get(1);
+  assert(lut_tensor0 == *read_lut_tensor0);
+  assert(lut_tensor1 == *read_lut_tensor1);
+
+  return 0;
+}
diff --git a/tensorflow/lite/micro/compression/metadata_test.py b/tensorflow/lite/micro/compression/metadata_test.py
new file mode 100644
index 00000000000..be3daa09d36
--- /dev/null
+++ b/tensorflow/lite/micro/compression/metadata_test.py
@@ -0,0 +1,68 @@
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test validity of the flatbuffer schema and illustrate use of the flatbuffer
+# machinery with Python
+
+import sys
+import hexdump
+import flatbuffers
+
+# `.*_generated` is the name of the module created by the Bazel rule
+# `flatbuffer_py_library' based on the schema.
+from tensorflow.lite.micro.compression import metadata_flatbuffer_py_generated as schema
+
+
+def main():
+  # The classes with a `T` suffix provide an object-oriented representation of
+  # the object tree in the flatbuffer using native data structures.
+  lut_tensor0 = schema.LutTensorT()
+  lut_tensor0.tensor = 63
+  lut_tensor0.valueBuffer = 128
+  lut_tensor0.indexBitwidth = 2
+
+  lut_tensor1 = schema.LutTensorT()
+  lut_tensor1.tensor = 64
+  lut_tensor1.valueBuffer = 129
+  lut_tensor1.indexBitwidth = 4
+
+  subgraph0 = schema.SubgraphT()
+  subgraph0.lutTensors = [lut_tensor0, lut_tensor1]
+
+  metadata = schema.MetadataT()
+  metadata.subgraphs = [subgraph0]
+
+  # Build the flatbuffer itself using the flatbuffers runtime module.
+  builder = flatbuffers.Builder(32)
+  root = metadata.Pack(builder)
+  builder.Finish(root)
+  buffer: bytearray = builder.Output()
+
+  print(hexdump.hexdump(buffer, result='return'))
+  print(f"length: {len(buffer)}")
+
+  def attrs_equal(a, b):
+    return all(vars(a)[key] == vars(b)[key] for key in vars(a))
+
+  read_metadata = schema.MetadataT.InitFromPackedBuf(buffer, 0)
+  read_subgraph0 = read_metadata.subgraphs[0]
+
+  assert attrs_equal(read_subgraph0.lutTensors[0], lut_tensor0)
+  assert attrs_equal(read_subgraph0.lutTensors[1], lut_tensor1)
+
+  sys.exit()
+
+
+if __name__ == "__main__":
+  main()
diff --git a/tensorflow/lite/micro/fake_micro_context.cc b/tensorflow/lite/micro/fake_micro_context.cc
index 5787ffd0648..8874798896c 100644
--- a/tensorflow/lite/micro/fake_micro_context.cc
+++ b/tensorflow/lite/micro/fake_micro_context.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,10 +23,23 @@ limitations under the License.
 
 namespace tflite {
 
-FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
-                                   SingleArenaBufferAllocator* allocator,
-                                   MicroGraph* micro_graph)
-    : graph_(*micro_graph), tensors_(tensors), allocator_(allocator) {}
+FakeMicroContext::FakeMicroContext(
+    TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
+    MicroGraph* micro_graph
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const CompressedTensorList* compressed_tensors
+#endif  // USE_TFLM_COMPRESSION
+    )
+    : graph_(*micro_graph),
+      tensors_(tensors),
+      allocator_(allocator)
+#ifdef USE_TFLM_COMPRESSION
+      ,
+      compressed_tensors_(compressed_tensors)
+#endif  // USE_TFLM_COMPRESSION
+{
+}
 
 TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
   allocated_temp_count_++;
@@ -112,4 +125,60 @@ void* FakeMicroContext::external_context() { return nullptr; }
 
 MicroGraph& FakeMicroContext::graph() { return graph_; }
 
+#ifdef USE_TFLM_COMPRESSION
+
+// Available during Prepare & Eval. Returns false if tensor is not
+// compressed.
+bool FakeMicroContext::IsTensorCompressed(const TfLiteNode* node,
+                                          int tensor_idx) {
+  if (compressed_tensors_ != nullptr && tensor_idx < node->inputs->size) {
+    int index = node->inputs->data[tensor_idx];
+    if (index >= 0 && compressed_tensors_->tensors[index] != nullptr) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Only available during Prepare. The kernel is responsible for storing the
+// scratch buffer handle.
+int FakeMicroContext::AllocateDecompressionScratchBuffer(const TfLiteNode* node,
+                                                         int tensor_idx) {
+  if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
+    return -1;
+  }
+  int index = node->inputs->data[tensor_idx];
+  if (index < 0 || compressed_tensors_->tensors[index] == nullptr) {
+    return -1;
+  }
+  TfLiteTensor* tensor = &tensors_[index];
+  int scratch_index = -1;
+  TfLiteStatus result =
+      RequestScratchBufferInArena(tensor->bytes, &scratch_index);
+  if (result != kTfLiteOk) {
+    return -1;
+  }
+
+  return scratch_index;
+}
+
+// Available during Prepare & Eval. Returns nullptr if tensor is not
+// compressed.
+const CompressionTensorData* FakeMicroContext::GetTensorCompressionData(
+    const TfLiteNode* node, int tensor_idx) {
+  if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
+    return nullptr;
+  }
+
+  int index = node->inputs->data[tensor_idx];
+  if (index < 0) {
+    return nullptr;
+  }
+
+  return compressed_tensors_->tensors[index];
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc
index 0df35fce4eb..221d560afa6 100644
--- a/tensorflow/lite/micro/kernels/conv.cc
+++ b/tensorflow/lite/micro/kernels/conv.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -45,15 +45,35 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
 
+#ifdef USE_TFLM_COMPRESSION
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  const CompressionTensorData* weights_comp_td =
+      micro_context->GetTensorCompressionData(node, kConvWeightsTensor);
+  const CompressionTensorData* bias_comp_td =
+      micro_context->GetTensorCompressionData(node, kConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
       tflite::reference_ops::Conv(
           ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(micro_context, filter,
+                                              weights_comp_td,
+                                              data.weights_scratch_index),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(micro_context, bias, bias_comp_td,
+                                              data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
           tflite::micro::GetOptionalTensorData<float>(bias),
+#endif  // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
@@ -67,9 +87,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 weights_comp_td,
+                                                 data.weights_scratch_index),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<int32_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
             tflite::micro::GetTensorShape(bias),
             tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output));
       } else if (bias->type == kTfLiteInt64) {
@@ -79,9 +108,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 weights_comp_td,
+                                                 data.weights_scratch_index),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<int64_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
             tflite::micro::GetTensorShape(bias),
             tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output));
       } else {
@@ -119,9 +157,19 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+              tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                   weights_comp_td,
+                                                   data.weights_scratch_index),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorData<int32_t>(
+                  micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorData<int8_t>(filter),
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
+
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
           break;
diff --git a/tensorflow/lite/micro/kernels/conv.h b/tensorflow/lite/micro/kernels/conv.h
index 0c8073f48f0..0090053e03c 100644
--- a/tensorflow/lite/micro/kernels/conv.h
+++ b/tensorflow/lite/micro/kernels/conv.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -49,6 +49,14 @@ struct OpDataConv {
   // A buffer used to store unpacked filter values. This is used if the source
   // tensor is of n-bit precision that cannot be easily processed by kernels.
   int filter_buffer_index;
+
+#ifdef USE_TFLM_COMPRESSION
+
+  // scratch buffers for compressed tensors
+  int weights_scratch_index;
+  int bias_scratch_index;
+
+#endif  // USE_TFLM_COMPRESSION
 };
 
 extern const int kConvInputTensor;
diff --git a/tensorflow/lite/micro/kernels/conv_common.cc b/tensorflow/lite/micro/kernels/conv_common.cc
index 51c7a6ff2d6..9f0f2f79588 100644
--- a/tensorflow/lite/micro/kernels/conv_common.cc
+++ b/tensorflow/lite/micro/kernels/conv_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -209,6 +209,23 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
                                          &data->filter_buffer_index);
   }
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  if (micro_context->IsTensorCompressed(node, kConvWeightsTensor) &&
+      filter->type == kTfLiteInt4) {
+    MicroPrintf("Compression not supported with INT4 tensors");
+    return kTfLiteError;
+  }
+  data->weights_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(node,
+                                                        kConvWeightsTensor);
+  data->bias_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(node, kConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   micro_context->DeallocateTempTfLiteTensor(filter);
   micro_context->DeallocateTempTfLiteTensor(input);
   micro_context->DeallocateTempTfLiteTensor(output);
diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc
index 0fb9411a3f0..0c3e0f06937 100644
--- a/tensorflow/lite/micro/kernels/conv_test.cc
+++ b/tensorflow/lite/micro/kernels/conv_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/lite/micro/kernels/conv_test.h"
 
+#include <type_traits>
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/micro/kernels/kernel_runner.h"
@@ -46,6 +48,83 @@ static int kOutputShape[] = {4, 2, 1, 2, 3};
 static const float kGoldenData[kOutputElements] = {18, 2, 5, 18, 2, 5,
                                                    17, 4, 3, 37, 4, 3};
 
+#ifdef USE_TFLM_COMPRESSION
+
+// compressed filter data for kBinQuant scheme, matches kFilterData
+constexpr uint8_t kBinQuantFilterData[] = {
+    0x05, 0x38, 0x20, 0x90, 0x00,
+};
+constexpr float kBinQuantFilterValueTable[] = {
+    1, 2, 3, 4, -1,
+};
+constexpr int kBinQuantFilterBitWidth = 3;
+// compressed bias data for kBinQuant scheme, matches kBiasData
+constexpr uint8_t kBinQuantBiasData[] = {0x18};
+constexpr int kBinQuantBiasBitWidth = 2;
+
+// Common inputs and outputs for quantized compressed tensor tests.
+// Values from TfLite conv_test.cc SimplePerChannelTest.
+static int kInputShapeQ1[] = {4, 1, 2, 3, 2};
+static const float kInputDataQ1[] = {
+    // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+    3,  2,   // batch = 0, y = 0, x = 0
+    1,  -1,  // batch = 0, y = 0, x = 1
+    -2, -3,  // batch = 0, y = 0, x = 2
+    4,  3,   // batch = 0, y = 1, x = 0
+    2,  -2,  // batch = 0, y = 1, x = 1
+    -3, -4,  // batch = 0, y = 1, x = 2
+};
+constexpr size_t kInputElementsQ1 = std::extent<decltype(kInputDataQ1)>::value;
+
+constexpr int kFilterNumChannelsQ1 = 2;
+static int kFilterShapeQ1[] = {4, 2, 2, 2, 2};
+static const float kFilterDataQ1[] = {
+    // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+    1, 2,  // out channel = 0, y = 0, x = 0
+    3, 4,  // out channel = 0, y = 0, x = 1
+    3, 4,  // out channel = 0, y = 1, x = 0
+    5, 6,  // out channel = 0, y = 1, x = 1
+    7, 8,  // out channel = 1, y = 0, x = 0
+    5, 6,  // out channel = 1, y = 0, x = 1
+    3, 4,  // out channel = 1, y = 1, x = 0
+    1, 2,  // out channel = 1, y = 1, x = 1
+};
+constexpr size_t kFilterElementsQ1 =
+    std::extent<decltype(kFilterDataQ1)>::value;
+
+static int kBiasShapeQ1[] = {1, 2};
+static const float kBiasDataQ1[] = {3, -2};
+constexpr size_t kBiasElementsQ1 = std::extent<decltype(kBiasDataQ1)>::value;
+
+static int kOutputShapeQ1[] = {4, 1, 1, 2, 2};
+static const float kGoldenDataQ1[] = {31, 64, -57, -46};
+constexpr int kOutputElementsQ1 = std::extent<decltype(kGoldenDataQ1)>::value;
+static const float kGoldenDataQ1_16[] = {31, 63.99804688, -57, -46};
+
+// compressed filter data for kBinQuant scheme, matches kFilterDataQ1
+constexpr uint8_t kBinQuantFilterDataQ1[] = {
+    0x05, 0x34, 0xE5, 0xDE, 0x54, 0xC1,
+};
+constexpr float kBinQuantFilterValueTableQ1[] = {
+    1, 2, 3, 4, 5, 6, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8,
+};
+constexpr int kBinQuantFilterBitWidthQ1 = 3;
+// compressed bias data for kBinQuant scheme, matches kBiasDataQ1
+constexpr uint8_t kBinQuantBiasDataQ1[] = {0x00};
+constexpr int kBinQuantBiasBitWidthQ1 = 1;
+
+static TfLiteConvParams common_conv_params_q1 = {
+    kTfLitePaddingValid,  // padding
+    1,                    // stride_width
+    1,                    // stride_height
+    kTfLiteActNone,       // activation
+    1,                    // dilation_width_factor
+    1,                    // dilation_height_factor
+    kTfLiteNoType         // quantized_bias_type
+};
+
+#endif  // USE_TFLM_COMPRESSION
+
 static TfLiteConvParams common_conv_params = {
     kTfLitePaddingValid,  // padding
     2,                    // stride_width
@@ -122,6 +201,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) {
           output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelCompressed) {
+  const float input_scale = 0.5f;
+  const float output_scale = 0.5f;
+  const int input_zero_point = -1;
+  const int output_zero_point = -1;
+  constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1,
+                                     1.0f, 2.0f};
+  constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1,
+                                        0, 0};
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int8_t input_quantized[tflite::testing::kInputElementsQ1];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ1];
+  int32_t bias_quantized[tflite::testing::kBiasElementsQ1];
+  int8_t golden_quantized[tflite::testing::kOutputElementsQ1];
+  int8_t output_quantized[tflite::testing::kOutputElementsQ1];
+
+  tflite::testing::TestCompressionQuantizedInfo<int32_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<
+          decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1;
+  comp_info.bias_data = tflite::testing::kBiasDataQ1;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestConvQuantizedPerChannelCompressed(
+          tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(),
+          &comp_info));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(SimpleTestFloat) {
   float output_data[tflite::testing::kOutputElements];
 
@@ -136,6 +275,37 @@ TF_LITE_MICRO_TEST(SimpleTestFloat) {
           output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestFloatCompressed) {
+  tflite::testing::TestCompressionInfo<const float, const float> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+  comp_info.filter_value_table = tflite::testing::kBinQuantFilterValueTable;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::kFilterData)>::value;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth;
+  comp_info.bias_value_table = tflite::testing::kBiasData;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasData)>::value;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth;
+
+  float output_data[tflite::testing::kOutputElements];
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestConvFloat(
+          tflite::testing::kInputShape, tflite::testing::kInputData,
+          tflite::testing::kFilterShape,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantFilterData),
+          tflite::testing::kBiasShape,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantBiasData),
+          tflite::testing::kOutputShape, tflite::testing::kGoldenData,
+          &tflite::testing::common_conv_params, tflite::Register_CONV_2D(),
+          output_data, &comp_info));
+}
+
+#endif
+
 TF_LITE_MICRO_TEST(InputAndFilterSameWidthHeight) {
   const int output_dims_count = 2;
   float output_data[output_dims_count];
@@ -246,6 +416,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBias) {
           output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBiasCompressed) {
+  const float input_scale = 128.0f / 65536;
+  const float output_scale = 128.0f / 65536;
+  const int input_zero_point = 0;
+  const int output_zero_point = 0;
+  constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1,
+                                     1.0f, 2.0f};
+  constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1,
+                                        0, 0};
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int16_t input_quantized[tflite::testing::kInputElementsQ1];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ1];
+  int64_t bias_quantized[tflite::testing::kBiasElementsQ1];
+  int16_t golden_quantized[tflite::testing::kOutputElementsQ1];
+  int16_t output_quantized[tflite::testing::kOutputElementsQ1];
+
+  tflite::testing::TestCompressionQuantizedInfo<int64_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<
+          decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1;
+  comp_info.bias_data = tflite::testing::kBiasDataQ1;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestConvQuantizedPerChannelCompressed(
+          tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1_16,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(),
+          &comp_info));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBias) {
   const int output_dims_count = 12;
   int16_t output_data[output_dims_count];
@@ -276,6 +506,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBias) {
           output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBiasCompressed) {
+  const float input_scale = 128.0f / 65536;
+  const float output_scale = 128.0f / 65536;
+  const int input_zero_point = 0;
+  const int output_zero_point = 0;
+  constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1,
+                                     1.0f, 2.0f};
+  constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1,
+                                        0, 0};
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int16_t input_quantized[tflite::testing::kInputElementsQ1];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ1];
+  int32_t bias_quantized[tflite::testing::kBiasElementsQ1];
+  int16_t golden_quantized[tflite::testing::kOutputElementsQ1];
+  int16_t output_quantized[tflite::testing::kOutputElementsQ1];
+
+  tflite::testing::TestCompressionQuantizedInfo<int32_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<
+          decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1;
+  comp_info.bias_data = tflite::testing::kBiasDataQ1;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestConvQuantizedPerChannelCompressed(
+          tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1_16,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(),
+          &comp_info));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(SimpleTestDilatedQuantizedPerChannel) {
   const int output_dims_count = 24;
   int8_t output_data[output_dims_count];
@@ -1190,3 +1480,60 @@ TF_LITE_MICRO_TEST(Int8Filter8x3x3x3PerChannelScaleRelu6ShouldMatchGolden) {
 }
 
 TF_LITE_MICRO_TESTS_END
+
+// {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+//       {TensorType_INT8,
+//        // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+//        {2, 2, 2, 2},
+//        0,
+//        0,
+//        0,
+//        0,
+//        /*per_channel_quantization=*/true,
+//        /*per_channel_quantization_scales=*/{1, 2},
+//        /*per_channel_quantization_offsets=*/{0, 0},
+//        /*channel_index=*/0},
+//       {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+//       /*stride_width=*/1, /*stride_height=*/1);
+//   m.SetInput<int8_t>({
+//       // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+//       3, 2,    // batch = 0, y = 0, x = 0
+//       1, -1,   // batch = 0, y = 0, x = 1
+//       -2, -3,  // batch = 0, y = 0, x = 2
+//       4, 3,    // batch = 0, y = 1, x = 0
+//       2, -2,   // batch = 0, y = 1, x = 1
+//       -3, -4,  // batch = 0, y = 1, x = 2
+//   });
+//   m.SetFilter(
+//       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+//       {
+//           1, 2,  // out channel = 0, y = 0, x = 0
+//           3, 4,  // out channel = 0, y = 0, x = 1
+//           3, 4,  // out channel = 0, y = 1, x = 0
+//           5, 6,  // out channel = 0, y = 1, x = 1
+//           7, 8,  // out channel = 1, y = 0, x = 0
+//           5, 6,  // out channel = 1, y = 0, x = 1
+//           3, 4,  // out channel = 1, y = 1, x = 0
+//           1, 2,  // out channel = 1, y = 1, x = 1
+//       });
+//   m.SetBias({3, -2});
+//     // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+//   EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+//               ElementsAreArray(ArrayFloatNear({31, 64, -57, -46})));
+//   EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({61, 127, -115, -93}));
+
+//   TEST_P(ConvolutionOpTest, SimplePerChannel16x8Bias32) {
+//   const float scale = 128.0 / 65536;
+//     // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+//   EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
+//               ElementsAreArray(ArrayFloatNear({31, 63.99804688, -57, -46})));
+//   EXPECT_THAT(m.GetOutput<int16_t>(),
+//               ElementsAreArray({15872, 32767, -29184, -23552}));
+
+//   TEST_P(ConvolutionOpTest, SimplePerChannel16x8Bias64) {
+//   const float scale = 128.0 / 65536;
+//     // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+//   EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
+//               ElementsAreArray(ArrayFloatNear({31, 63.99804688, -57, -46})));
+//   EXPECT_THAT(m.GetOutput<int16_t>(),
+//               ElementsAreArray({15872, 32767, -29184, -23552}));
\ No newline at end of file
diff --git a/tensorflow/lite/micro/kernels/conv_test.h b/tensorflow/lite/micro/kernels/conv_test.h
index c655f043bcc..7f6c55e2a9e 100644
--- a/tensorflow/lite/micro/kernels/conv_test.h
+++ b/tensorflow/lite/micro/kernels/conv_test.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
 #include "tensorflow/lite/micro/kernels/kernel_runner.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
 #include "tensorflow/lite/micro/test_helpers.h"
@@ -26,35 +27,123 @@ limitations under the License.
 namespace tflite {
 namespace testing {
 
-TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
-                        int output_length, TfLiteConvParams* conv_params,
-                        TFLMRegistration registration, float* output_data);
+constexpr int kConvMaxTensors = 4;
+constexpr int kConvMaxInputTensors = 3;
 
+template <typename T>
 TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
-                        int output_length, TfLiteConvParams* conv_params,
-                        TFLMRegistration registration, int8_t* output_data);
-
-TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const float* expected_output_data,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 TFLMRegistration registration,
-                                 float* output_data, float tolerance = 1e-5);
-
-TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const int8_t* expected_output_data,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 TFLMRegistration registration,
-                                 int8_t* output_data, float tolerance = 1e-5);
-
-TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
-                           int* filter_dims_data, const float* filter_data,
-                           int* bias_dims_data, const float* bias_data,
-                           int* output_dims_data,
-                           const float* expected_output_data,
-                           TfLiteConvParams* conv_params,
-                           TFLMRegistration registration, float* output_data);
+                        int output_length, const TfLiteConvParams* conv_params,
+                        TFLMRegistration registration, T* output_data
+#ifdef USE_TFLM_COMPRESSION
+                        ,
+                        const CompressedTensorList* comp_list_p = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
+  // TODO(b/358165875): support optional bias tensor
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+
+  micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
+                             outputs_array, conv_params
+#ifdef USE_TFLM_COMPRESSION
+                             ,
+                             nullptr, comp_list_p
+#endif  // USE_TFLM_COMPRESSION
+  );
+
+  const char* init_data = reinterpret_cast<const char*>(conv_params);
+  TfLiteStatus status = runner.InitAndPrepare(init_data);
+  if (status != kTfLiteOk) {
+    return status;
+  }
+  return runner.Invoke();
+}
+
+template <typename T, typename CTF = void, typename CTB = void>
+TfLiteStatus ValidateConvGoldens(
+    TfLiteTensor* tensors, int tensors_size, const T* expected_output_data,
+    int output_length, const TfLiteConvParams* conv_params,
+    TFLMRegistration registration, T* output_data, float tolerance = 1e-5
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
+#ifdef USE_TFLM_COMPRESSION
+
+  TestCompressedList<kConvMaxInputTensors, CTF, CTB> tcl;
+  const CompressedTensorList* comp_list_p = nullptr;
+
+  if (comp_info != nullptr) {
+    TF_LITE_MICRO_EXPECT_EQ(
+        tcl.AddWeight(*comp_info, tensors[kConvWeightsTensor],
+                      kConvWeightsTensor),
+        kTfLiteOk);
+    TF_LITE_MICRO_CHECK_FAIL();
+    TF_LITE_MICRO_EXPECT_EQ(
+        tcl.AddBias(*comp_info, tensors[kConvBiasTensor], kConvBiasTensor),
+        kTfLiteOk);
+    TF_LITE_MICRO_CHECK_FAIL();
+    comp_list_p = tcl.GetCompressedTensorList();
+  }
+
+#endif  // USE_TFLM_COMPRESSION
+
+  TfLiteStatus status = InvokeConv(tensors, tensors_size, output_length,
+                                   conv_params, registration, output_data
+#ifdef USE_TFLM_COMPRESSION
+                                   ,
+                                   comp_list_p
+#endif  // USE_TFLM_COMPRESSION
+  );
+  if (status != kTfLiteOk) {
+    return status;
+  }
+  for (int i = 0; i < output_length; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i],
+                              tolerance);
+  }
+  return kTfLiteOk;
+}
+
+template <typename CTF = void, typename CTB = void>
+TfLiteStatus TestConvFloat(
+    int* input_dims_data, const float* input_data, int* filter_dims_data,
+    const float* filter_data, int* bias_dims_data, const float* bias_data,
+    int* output_dims_data, const float* expected_output_data,
+    TfLiteConvParams* conv_params, TFLMRegistration registration,
+    float* output_data
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateTensor(input_data, input_dims),
+      CreateTensor(filter_data, filter_dims),
+      CreateTensor(bias_data, bias_dims),
+      CreateTensor(output_data, output_dims),
+  };
+
+  return ValidateConvGoldens(tensors, tensors_size, expected_output_data,
+                             output_dims_count, conv_params, registration,
+                             output_data
+#ifdef USE_TFLM_COMPRESSION
+                             ,
+                             1e-5f, comp_info
+#endif  // USE_TFLM_COMPRESSION
+  );
+}
 
 TfLiteStatus TestConvQuantizedPerChannel(
     int* input_dims_data, const float* input_data, int8_t* input_quantized,
@@ -88,6 +177,71 @@ TfLiteStatus TestConvQuantizedPerChannel(
     float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
     TFLMRegistration registration, int16_t* output_data);
 
+#ifdef USE_TFLM_COMPRESSION
+
+template <typename TIO, typename CTB>
+TfLiteStatus TestConvQuantizedPerChannelCompressed(
+    int* input_dims_data, const float* input_data, TIO* input_quantized,
+    float input_scale, int input_zero_point, int* output_dims_data,
+    const float* expected_output_data, TIO* expected_output_quantized,
+    TIO* output_quantized, float output_scale, int output_zero_point,
+    const TfLiteConvParams* conv_params, TFLMRegistration registration,
+    const TestCompressionQuantizedInfo<CTB>* comp_info) {
+  // TODO(b/358165875): account for optional bias tensor
+  // bool null_bias = comp_info->bias_data == nullptr ? true : false;
+
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+
+  TfLiteFloatArray* filter_scales =
+      FloatArrayFromFloats(comp_info->filter_scales);
+  TfLiteIntArray* filter_zero_points =
+      IntArrayFromInts(comp_info->filter_zero_points);
+  TfLiteFloatArray* bias_scales = FloatArrayFromFloats(comp_info->bias_scales);
+  TfLiteIntArray* bias_zero_points =
+      IntArrayFromInts(comp_info->bias_zero_points);
+
+  TfLiteAffineQuantization filter_quant = {};
+  TfLiteTensor filter_tensor = CreatePerChannelQuantizedTensor(
+      comp_info->filter_compressed, filter_dims, filter_scales,
+      filter_zero_points, &filter_quant, kConvQuantizedDimension,
+      false /* is_variable */, kTfLiteInt8);
+  SymmetricPerChannelQuantize(
+      comp_info->filter_data, comp_info->filter_value_table,
+      ElementCount(*filter_dims), filter_scales->size, filter_scales->data);
+
+  TfLiteAffineQuantization bias_quant = {};
+  TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor(
+      comp_info->bias_compressed, bias_dims, input_scale, filter_scales,
+      bias_scales, bias_zero_points, &bias_quant, kConvQuantizedDimension,
+      false /* is_variable */, typeToTfLiteType<CTB>());
+  SymmetricPerChannelQuantize(comp_info->bias_data, comp_info->bias_value_table,
+                              ElementCount(*bias_dims), bias_scales->size,
+                              bias_scales->data);
+
+  constexpr int tensors_size = kConvMaxTensors;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_quantized, input_dims,
+                            input_scale, input_zero_point),
+      filter_tensor,
+      bias_tensor,
+      CreateQuantizedTensor(output_quantized, output_dims, output_scale,
+                            output_zero_point),
+  };
+
+  const int output_dims_count = ElementCount(*output_dims);
+  Quantize(expected_output_data, expected_output_quantized, output_dims_count,
+           output_scale, output_zero_point);
+  return ValidateConvGoldens(tensors, tensors_size, expected_output_quantized,
+                             output_dims_count, conv_params, registration,
+                             output_quantized, 1.0e-5f /* tolerance */,
+                             comp_info);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace testing
 }  // namespace tflite
 
diff --git a/tensorflow/lite/micro/kernels/conv_test_common.cc b/tensorflow/lite/micro/kernels/conv_test_common.cc
index a0f733b8e42..7b6f71a8fc3 100644
--- a/tensorflow/lite/micro/kernels/conv_test_common.cc
+++ b/tensorflow/lite/micro/kernels/conv_test_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,108 +18,6 @@ limitations under the License.
 namespace tflite {
 namespace testing {
 
-template <typename T>
-TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
-                        int output_length, TfLiteConvParams* conv_params,
-                        TFLMRegistration registration, T* output_data) {
-  int inputs_array_data[] = {3, 0, 1, 2};
-  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
-  int outputs_array_data[] = {1, 3};
-  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
-
-  micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
-                             outputs_array, conv_params);
-
-  const char* init_data = reinterpret_cast<const char*>(conv_params);
-  TfLiteStatus status = runner.InitAndPrepare(init_data);
-  if (status != kTfLiteOk) {
-    return status;
-  }
-  return runner.Invoke();
-}
-
-template <typename T>
-TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const T* expected_output_data,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 TFLMRegistration registration, T* output_data,
-                                 float tolerance) {
-  TfLiteStatus status = InvokeConv(tensors, tensors_size, output_length,
-                                   conv_params, registration, output_data);
-  if (status != kTfLiteOk) {
-    return status;
-  }
-  for (int i = 0; i < output_length; ++i) {
-    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i],
-                              tolerance);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
-                        int output_length, TfLiteConvParams* conv_params,
-                        TFLMRegistration registration, float* output_data) {
-  return InvokeConv<float>(tensors, tensors_size, output_length, conv_params,
-                           registration, output_data);
-}
-
-TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
-                        int output_length, TfLiteConvParams* conv_params,
-                        TFLMRegistration registration, int8_t* output_data) {
-  return InvokeConv<int8_t>(tensors, tensors_size, output_length, conv_params,
-                            registration, output_data);
-}
-
-TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const float* expected_output_data,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 TFLMRegistration registration,
-                                 float* output_data, float tolerance) {
-  return ValidateConvGoldens<float>(tensors, tensors_size, expected_output_data,
-                                    output_length, conv_params, registration,
-                                    output_data, tolerance);
-}
-
-TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const int8_t* expected_output_data,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 TFLMRegistration registration,
-                                 int8_t* output_data, float tolerance) {
-  return ValidateConvGoldens<int8_t>(
-      tensors, tensors_size, expected_output_data, output_length, conv_params,
-      registration, output_data, tolerance);
-}
-
-TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
-                           int* filter_dims_data, const float* filter_data,
-                           int* bias_dims_data, const float* bias_data,
-                           int* output_dims_data,
-                           const float* expected_output_data,
-                           TfLiteConvParams* conv_params,
-                           TFLMRegistration registration, float* output_data) {
-  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
-  TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
-  TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
-  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
-  const int output_dims_count = ElementCount(*output_dims);
-  constexpr int inputs_size = 3;
-  constexpr int outputs_size = 1;
-  constexpr int tensors_size = inputs_size + outputs_size;
-  TfLiteTensor tensors[tensors_size] = {
-      CreateTensor(input_data, input_dims),
-      CreateTensor(filter_data, filter_dims),
-      CreateTensor(bias_data, bias_dims),
-      CreateTensor(output_data, output_dims),
-  };
-
-  return ValidateConvGoldens(tensors, tensors_size, expected_output_data,
-                             output_dims_count, conv_params, registration,
-                             output_data);
-}
-
 template <typename T, typename BiasT>
 TfLiteStatus TestConvQuantizedPerChannel(
     int* input_dims_data, const float* input_data, T* input_quantized,
diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc
index 65c83792e87..21d061ae430 100644
--- a/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -60,7 +60,7 @@ TfLiteStatus FullyConnectedPrepare(TfLiteContext* context, TfLiteNode* node) {
       (input->type == kTfLiteInt8 &&
        (filter->type != kTfLiteInt8 && filter->type != kTfLiteInt4)) ||
       (input->type == kTfLiteInt16 && filter->type != kTfLiteInt8)) {
-    MicroPrintf("Input type: %s with filter type : %s not supported.",
+    MicroPrintf("Input type: %s with filter type: %s not supported.",
                 TfLiteTypeGetName(input->type),
                 TfLiteTypeGetName(filter->type));
     return kTfLiteError;
@@ -79,6 +79,23 @@ TfLiteStatus FullyConnectedPrepare(TfLiteContext* context, TfLiteNode* node) {
                                  context, params->activation, input->type,
                                  input, filter, bias, output, data));
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  if (micro_context->IsTensorCompressed(node, kFullyConnectedWeightsTensor) &&
+      filter->type == kTfLiteInt4) {
+    MicroPrintf("Compression not supported with INT4 tensors");
+    return kTfLiteError;
+  }
+  data->weights_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(
+          node, kFullyConnectedWeightsTensor);
+  data->bias_scratch_index = micro_context->AllocateDecompressionScratchBuffer(
+      node, kFullyConnectedBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   micro_context->DeallocateTempTfLiteTensor(input);
   micro_context->DeallocateTempTfLiteTensor(filter);
   if (bias != nullptr) {
@@ -102,8 +119,19 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteEvalTensor* output =
       tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
 
-  TFLITE_DCHECK(node->user_data != nullptr);
+#ifdef USE_TFLM_COMPRESSION
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  const CompressionTensorData* weights_comp_td =
+      micro_context->GetTensorCompressionData(node,
+                                              kFullyConnectedWeightsTensor);
+  const CompressionTensorData* bias_comp_td =
+      micro_context->GetTensorCompressionData(node, kFullyConnectedBiasTensor);
 
+#endif  // USE_TFLM_COMPRESSION
+
+  TFLITE_DCHECK(node->user_data != nullptr);
   const auto& data =
       *(static_cast<const OpDataFullyConnected*>(node->user_data));
 
@@ -115,9 +143,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(micro_context, filter,
+                                              weights_comp_td,
+                                              data.weights_scratch_index),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(micro_context, bias, bias_comp_td,
+                                              data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
           tflite::micro::GetOptionalTensorData<float>(bias),
+#endif  // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
@@ -149,9 +186,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) {
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+              tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                   weights_comp_td,
+                                                   data.weights_scratch_index),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorData<int32_t>(
+                  micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorData<int8_t>(filter),
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
           break;
@@ -173,9 +219,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) {
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int16_t>(input),
               tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+              tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                   weights_comp_td,
+                                                   data.weights_scratch_index),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorData<int64_t>(
+                  micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorData<int8_t>(filter),
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int64_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int16_t>(output));
           break;
diff --git a/tensorflow/lite/micro/kernels/fully_connected.h b/tensorflow/lite/micro/kernels/fully_connected.h
index 8308838ec6d..d7ea705964c 100644
--- a/tensorflow/lite/micro/kernels/fully_connected.h
+++ b/tensorflow/lite/micro/kernels/fully_connected.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -46,6 +46,14 @@ struct OpDataFullyConnected {
   // tensor is of n-bit precision that cannot be easily processed by kernels.
   int filter_buffer_index;
 #endif
+
+#ifdef USE_TFLM_COMPRESSION
+
+  // scratch buffers for compressed tensors
+  int weights_scratch_index;
+  int bias_scratch_index;
+
+#endif  // USE_TFLM_COMPRESSION
 };
 
 extern const int kFullyConnectedInputTensor;
diff --git a/tensorflow/lite/micro/kernels/fully_connected_test.cc b/tensorflow/lite/micro/kernels/fully_connected_test.cc
index 2ad132055b8..8c605fefbcb 100644
--- a/tensorflow/lite/micro/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/micro/kernels/fully_connected_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -42,6 +42,20 @@ const float simple_weights_data[] = {
     1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
 };
 
+#ifdef USE_TFLM_COMPRESSION
+
+// compressed filter data for kBinQuant scheme
+constexpr uint8_t kBinQuantFilterData[] = {0x01, 0x23, 0x45, 0x67, 0x89,
+                                           0x01, 0x23, 0x45, 0x67, 0x89,
+                                           0x01, 0x23, 0x45, 0x67, 0x89};
+constexpr float kBinQuantFilterValueTable[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+constexpr int kBinQuantFilterBitWidth = 4;
+// compressed bias data for kBinQuant scheme
+constexpr uint8_t kBinQuantBiasData[] = {0x18};
+constexpr int kBinQuantBiasBitWidth = 2;
+
+#endif  // USE_TFLM_COMPRESSION
+
 // TODO(b/258710417): INT4 isn't currently supported on Hexagon.
 #if !defined(HEXAGON)
 const float simple_int4_weights_data[] = {
@@ -241,11 +255,18 @@ const float representative_64x16_golden[] = {
 const int representative_64x16_output_size = 16;
 int representative_64x16_output_dims[] = {2, 1, 16};
 
-template <typename T>
+constexpr int kMaxTensors = 4;
+
+template <typename T, typename CTF = void, typename CTB = void>
 TfLiteStatus ValidateFullyConnectedGoldens(
     TfLiteTensor* tensors, const int tensors_size, bool null_bias,
     const TfLiteFusedActivation activation, const float tolerance,
-    const int output_len, const T* golden, T* output_data) {
+    const int output_len, const T* golden, T* output_data
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
   TfLiteFullyConnectedParams builtin_data = {
       activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false,
       kTfLiteNoType};
@@ -272,10 +293,38 @@ TfLiteStatus ValidateFullyConnectedGoldens(
   TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
   TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
 
+#ifdef USE_TFLM_COMPRESSION
+
+  TestCompressedList<kMaxTensors, CTF, CTB> tcl;
+  const CompressedTensorList* comp_list_p = nullptr;
+
+  if (comp_info != nullptr) {
+    TF_LITE_MICRO_EXPECT_EQ(
+        tcl.AddWeight(*comp_info, tensors[kFullyConnectedWeightsTensor],
+                      kFullyConnectedWeightsTensor),
+        kTfLiteOk);
+    TF_LITE_MICRO_CHECK_FAIL();
+    if (!null_bias) {
+      TF_LITE_MICRO_EXPECT_EQ(
+          tcl.AddBias(*comp_info, tensors[kFullyConnectedBiasTensor],
+                      kFullyConnectedBiasTensor),
+          kTfLiteOk);
+      TF_LITE_MICRO_CHECK_FAIL();
+    }
+    comp_list_p = tcl.GetCompressedTensorList();
+  }
+
+#endif  // USE_TFLM_COMPRESSION
+
   const TFLMRegistration registration = Register_FULLY_CONNECTED();
   micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
                              outputs_array,
-                             reinterpret_cast<void*>(&builtin_data));
+                             reinterpret_cast<void*>(&builtin_data), nullptr
+#ifdef USE_TFLM_COMPRESSION
+                             ,
+                             comp_list_p
+#endif  // USE_TFLM_COMPRESSION
+  );
 
   TfLiteStatus status = runner.InitAndPrepare();
   if (status != kTfLiteOk) {
@@ -293,11 +342,17 @@ TfLiteStatus ValidateFullyConnectedGoldens(
   return kTfLiteOk;
 }
 
+template <typename CTF = void, typename CTB = void>
 TfLiteStatus TestFullyConnectedFloat(
     int* input_dims_data, const float* input_data, int* weights_dims_data,
     const float* weights_data, int* bias_dims_data, const float* bias_data,
     const float* golden, int* output_dims_data,
-    TfLiteFusedActivation activation, float* output_data) {
+    TfLiteFusedActivation activation, float* output_data
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* weights_dims = IntArrayFromInts(weights_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -305,16 +360,15 @@ TfLiteStatus TestFullyConnectedFloat(
   const int output_dims_count = ElementCount(*output_dims);
   bool null_bias = bias_data == nullptr ? true : false;
 
-  constexpr int array_size = 4;  // Avoid variable length array warning.
-  const int inputs_size = bias_data == nullptr ? 2 : 3;
+  const int inputs_size = null_bias ? 2 : 3;
   constexpr int outputs_size = 1;
   const int tensors_size = inputs_size + outputs_size;
-  TfLiteTensor tensors[array_size];
+  TfLiteTensor tensors[kMaxTensors];
 
   tensors[0] = CreateTensor(input_data, input_dims);
   tensors[1] = CreateTensor(weights_data, weights_dims);
 
-  if (bias_data == nullptr) {
+  if (null_bias) {
     tensors[2] = CreateTensor(output_data, output_dims);
   } else {
     tensors[2] = CreateTensor(bias_data, bias_dims);
@@ -323,7 +377,12 @@ TfLiteStatus TestFullyConnectedFloat(
 
   return ValidateFullyConnectedGoldens(tensors, tensors_size, null_bias,
                                        activation, 1e-4f, output_dims_count,
-                                       golden, output_data);
+                                       golden, output_data
+#ifdef USE_TFLM_COMPRESSION
+                                       ,
+                                       comp_info
+#endif  // USE_TFLM_COMPRESSION
+  );
 }
 
 template <typename dataT, typename weightT, typename biasT>
@@ -345,7 +404,7 @@ TfLiteStatus TestFullyConnectedQuantized(
   bool null_bias = bias_data == nullptr ? true : false;
 
   constexpr int array_size = 4;  // Avoid variable length array warning.
-  const int inputs_size = bias_data == nullptr ? 2 : 3;
+  const int inputs_size = null_bias ? 2 : 3;
   constexpr int outputs_size = 1;
   const int tensors_size = inputs_size + outputs_size;
   TfLiteTensor tensors[array_size];
@@ -355,7 +414,7 @@ TfLiteStatus TestFullyConnectedQuantized(
   tensors[1] = CreateQuantizedTensor(
       weights_data, weights_quantized, weights_dims, weights_scale,
       weights_zero_point, false, weights_packed_type);
-  if (bias_data == nullptr) {
+  if (null_bias) {
     tensors[2] = CreateQuantizedTensor(output_data, output_dims, output_scale,
                                        output_zero_point);
   } else {
@@ -373,6 +432,68 @@ TfLiteStatus TestFullyConnectedQuantized(
                                        golden_quantized, output_data);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+template <typename TIO, typename CTB>
+TfLiteStatus TestFullyConnectedQuantizedCompressed(
+    int* input_dims_data, const float* input_data, TIO* input_quantized,
+    float input_scale, int input_zero_point, int* output_dims_data,
+    const float* expected_output_data, TIO* expected_output_quantized,
+    TIO* output_quantized, float output_scale, int output_zero_point,
+    const TfLiteFusedActivation activation,
+    const TestCompressionQuantizedInfo<CTB>* comp_info) {
+  bool null_bias = comp_info->bias_data == nullptr ? true : false;
+
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+
+  TfLiteFloatArray* filter_scales =
+      FloatArrayFromFloats(comp_info->filter_scales);
+  TfLiteIntArray* filter_zero_points =
+      IntArrayFromInts(comp_info->filter_zero_points);
+
+  TfLiteTensor filter_tensor = CreateQuantizedTensor(
+      comp_info->filter_compressed, filter_dims, filter_scales->data[0],
+      filter_zero_points->data[0], false, kTfLiteInt8);
+  SymmetricQuantize(comp_info->filter_data, comp_info->filter_value_table,
+                    ElementCount(*filter_dims), filter_scales->data[0]);
+
+  TfLiteTensor bias_tensor = {};
+  if (!null_bias) {
+    bias_tensor = CreateQuantizedTensor(comp_info->bias_compressed, bias_dims,
+                                        input_scale * filter_scales->data[0], 0,
+                                        false, typeToTfLiteType<CTB>());
+    SymmetricQuantize(comp_info->bias_data, comp_info->bias_value_table,
+                      ElementCount(*bias_dims), bias_tensor.params.scale);
+  }
+
+  TfLiteTensor output_tensor = CreateQuantizedTensor(
+      output_quantized, output_dims, output_scale, output_zero_point);
+
+  const int tensors_size = null_bias ? kMaxTensors - 1 : kMaxTensors;
+  TfLiteTensor tensors[kMaxTensors] = {};
+  tensors[0] = CreateQuantizedTensor(input_data, input_quantized, input_dims,
+                                     input_scale, input_zero_point);
+  tensors[1] = filter_tensor;
+  if (null_bias) {
+    tensors[2] = output_tensor;
+  } else {
+    tensors[2] = bias_tensor;
+    tensors[3] = output_tensor;
+  }
+
+  const int output_dims_count = ElementCount(*output_dims);
+  Quantize(expected_output_data, expected_output_quantized, output_dims_count,
+           output_scale, output_zero_point);
+  return ValidateFullyConnectedGoldens(
+      tensors, tensors_size, null_bias, activation, 0.0f, output_dims_count,
+      expected_output_quantized, output_quantized, comp_info);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
@@ -393,6 +514,37 @@ TF_LITE_MICRO_TEST(SimpleTest) {
       kTfLiteOk);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestCompressed) {
+  float output_data[tflite::testing::simple_output_size];
+
+  tflite::testing::TestCompressionInfo<const float, const float> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+  comp_info.filter_value_table = tflite::testing::kBinQuantFilterValueTable;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::simple_weights_data)>::value;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth;
+  comp_info.bias_value_table = tflite::testing::simple_bias_data;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::simple_bias_data)>::value;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth;
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedFloat(
+          tflite::testing::simple_input_dims,
+          tflite::testing::simple_input_data,
+          tflite::testing::simple_weights_dims,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantFilterData),
+          tflite::testing::simple_bias_dims,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantBiasData),
+          tflite::testing::simple_golden, tflite::testing::simple_output_dims,
+          kTfLiteActNone, output_data, &comp_info),
+      kTfLiteOk);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(SimpleTestNullBias) {
   float output_data[tflite::testing::simple_output_size];
   TF_LITE_MICRO_EXPECT_EQ(
@@ -434,6 +586,54 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8) {
       kTfLiteOk);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Compressed) {
+  const float input_scale = 1.0f;
+  const int input_zero_point = -1;
+  constexpr float weights_scale[] = {1, 1.0f};
+  constexpr int weights_zero_point[] = {1, 0};
+  const float output_scale = 0.5f;
+  const int output_zero_point = -1;
+
+  int8_t input_quantized[tflite::testing::simple_input_size];
+  int8_t weights_quantized[tflite::testing::simple_weights_size];
+  int32_t bias_quantized[tflite::testing::simple_output_size];
+  int8_t golden_quantized[tflite::testing::simple_output_size];
+  int8_t output_data[tflite::testing::simple_output_size];
+
+  tflite::testing::TestCompressionQuantizedInfo<int32_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+  comp_info.filter_value_table = weights_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::kBinQuantFilterValueTable)>::value;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterData;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTable;
+  comp_info.filter_dims_data = tflite::testing::simple_weights_dims;
+  comp_info.filter_scales = weights_scale;
+  comp_info.filter_zero_points = weights_zero_point;
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::simple_bias_data)>::value;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasData;
+  comp_info.bias_data = tflite::testing::simple_bias_data;
+  comp_info.bias_dims_data = tflite::testing::simple_bias_dims;
+  // bias_scales and bias_zero_points are not used
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantizedCompressed(
+          tflite::testing::simple_input_dims,
+          tflite::testing::simple_input_data, input_quantized, input_scale,
+          input_zero_point, tflite::testing::simple_output_dims,
+          tflite::testing::simple_golden, golden_quantized, output_data,
+          output_scale, output_zero_point, kTfLiteActNone, &comp_info),
+      kTfLiteOk);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 #if !defined(HEXAGON)
 TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) {
   const float input_scale = 128.0 / 65536;
@@ -443,7 +643,6 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) {
   const float output_scale = 128.0 / 65536;
   const int output_zero_point = 0;
 
-  const float simple_golden[] = {24, 25, 26, 58, 59, 60};
   int16_t input_quantized[tflite::testing::simple_input_size];
   int8_t weights_quantized[tflite::testing::simple_weights_size];
   int64_t bias_quantized[tflite::testing::simple_output_size];
@@ -457,12 +656,62 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) {
           input_zero_point, tflite::testing::simple_weights_dims,
           tflite::testing::simple_weights_data, weights_quantized,
           weights_scale, weights_zero_point, tflite::testing::simple_bias_dims,
-          tflite::testing::simple_bias_data, bias_quantized, simple_golden,
-          golden_quantized, tflite::testing::simple_output_dims, output_scale,
-          output_zero_point, kTfLiteActNone, output_data),
+          tflite::testing::simple_bias_data, bias_quantized,
+          tflite::testing::simple_golden, golden_quantized,
+          tflite::testing::simple_output_dims, output_scale, output_zero_point,
+          kTfLiteActNone, output_data),
       kTfLiteOk);
 }
-#endif
+
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16Compressed) {
+  const float input_scale = 128.0 / 65536;
+  const int input_zero_point = 0;
+  constexpr float weights_scale[] = {1, 1.0f};
+  constexpr int weights_zero_point[] = {1, 0};
+  const float output_scale = 128.0 / 65536;
+  const int output_zero_point = 0;
+
+  int16_t input_quantized[tflite::testing::simple_input_size];
+  int8_t weights_quantized[tflite::testing::simple_weights_size];
+  int64_t bias_quantized[tflite::testing::simple_output_size];
+  int16_t golden_quantized[tflite::testing::simple_output_size];
+  int16_t output_data[tflite::testing::simple_output_size];
+
+  tflite::testing::TestCompressionQuantizedInfo<int64_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+  comp_info.filter_value_table = weights_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::kBinQuantFilterValueTable)>::value;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterData;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTable;
+  comp_info.filter_dims_data = tflite::testing::simple_weights_dims;
+  comp_info.filter_scales = weights_scale;
+  comp_info.filter_zero_points = weights_zero_point;
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::simple_bias_data)>::value;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasData;
+  comp_info.bias_data = tflite::testing::simple_bias_data;
+  comp_info.bias_dims_data = tflite::testing::simple_bias_dims;
+  // bias_scales and bias_zero_points are not used
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantizedCompressed(
+          tflite::testing::simple_input_dims,
+          tflite::testing::simple_input_data, input_quantized, input_scale,
+          input_zero_point, tflite::testing::simple_output_dims,
+          tflite::testing::simple_golden, golden_quantized, output_data,
+          output_scale, output_zero_point, kTfLiteActNone, &comp_info),
+      kTfLiteOk);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
+#endif  // !defined(HEXAGON)
 
 TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) {
   const float input_scale = 1.0f;
diff --git a/tensorflow/lite/micro/kernels/kernel_runner.cc b/tensorflow/lite/micro/kernels/kernel_runner.cc
index 602778d7c50..79824efe5de 100644
--- a/tensorflow/lite/micro/kernels/kernel_runner.cc
+++ b/tensorflow/lite/micro/kernels/kernel_runner.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
 #include "tensorflow/lite/micro/micro_arena_constants.h"
 #include "tensorflow/lite/micro/micro_log.h"
-#include "tensorflow/lite/micro/test_helpers.h"
 
 namespace tflite {
 namespace micro {
@@ -38,12 +37,22 @@ KernelRunner::KernelRunner(const TFLMRegistration& registration,
                            TfLiteTensor* tensors, int tensors_size,
                            TfLiteIntArray* inputs, TfLiteIntArray* outputs,
                            const void* builtin_data,
-                           TfLiteIntArray* intermediates)
+                           TfLiteIntArray* intermediates
+#ifdef USE_TFLM_COMPRESSION
+                           ,
+                           const CompressedTensorList* compressed_tensors
+#endif  // USE_TFLM_COMPRESSION
+                           )
     : registration_(registration),
       allocator_(SingleArenaBufferAllocator::Create(kKernelRunnerBuffer_,
                                                     kKernelRunnerBufferSize_)),
       mock_micro_graph_(allocator_),
-      fake_micro_context_(tensors, allocator_, &mock_micro_graph_) {
+      fake_micro_context_(tensors, allocator_, &mock_micro_graph_
+#ifdef USE_TFLM_COMPRESSION
+                          ,
+                          compressed_tensors
+#endif  // USE_TFLM_COMPRESSION
+      ) {
   // Prepare TfLiteContext:
   context_.impl_ = static_cast<void*>(&fake_micro_context_);
   context_.ReportError = MicroContextReportOpError;
diff --git a/tensorflow/lite/micro/kernels/kernel_util.h b/tensorflow/lite/micro/kernels/kernel_util.h
index 977ed9563e1..5ef4bac85c9 100644
--- a/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/tensorflow/lite/micro/kernels/kernel_util.h
@@ -95,8 +95,6 @@ const T* GetOptionalTensorData(const TfLiteEvalTensor* tensor) {
 
 // Overloads existing GetTensorData. If not compressed, this will return
 // tensor->data.
-//
-// TODO(ddavis-2015): make micro_context a const pointer
 template <typename T>
 const T* GetTensorData(MicroContext* micro_context,
                        const TfLiteEvalTensor* tensor,
diff --git a/tensorflow/lite/micro/kernels/transpose_conv.cc b/tensorflow/lite/micro/kernels/transpose_conv.cc
index ea0efae0607..7d65dc3de7c 100644
--- a/tensorflow/lite/micro/kernels/transpose_conv.cc
+++ b/tensorflow/lite/micro/kernels/transpose_conv.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -27,30 +27,26 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/transpose_conv.h"
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
 
-// For the TfLite transpose_conv implementation, input tensor 0 corresponds to
-// the OutputShapeTensor. However, since TFLM does not support dynamic tensors,
-// the TFLM implementation ignores input tensor 0 and the only inputs we care
-// about are kFilterTensor, kInputTensor and kBiasTensor.
-constexpr int kFilterTensor = 1;
-constexpr int kInputTensor = 2;
-constexpr int kBiasTensor = 3;
-constexpr int kOutputTensor = 0;
-
-// Conv is quantized along dimension 0:
-// https://www.tensorflow.org/lite/performance/quantization_spec
-constexpr int kConvQuantizedDimension = 0;
-
 struct OpData {
   ConvParams params;
 
   // A scratch buffer is required for quantized implementations.
   int scratch_buffer_index;
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // scratch buffers for compressed tensors
+  int filter_scratch_index;
+  int bias_scratch_index;
+
+#endif  // USE_TFLM_COMPRESSION
+
   // Index to the converted 64-bit bias buffer from 16-bit bias. This is
   // required to handle 16x8 transpose convolutions where a 16-bit bias is
   // provided, whereas the kernel expects 64-bit biases.
@@ -102,17 +98,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
     MicroContext* micro_context = GetMicroContext(context);
 
     TfLiteTensor* input =
-        micro_context->AllocateTempInputTensor(node, kInputTensor);
+        micro_context->AllocateTempInputTensor(node, kTransposeConvInputTensor);
     TF_LITE_ENSURE(context, input != nullptr);
-    TfLiteTensor* filter =
-        micro_context->AllocateTempInputTensor(node, kFilterTensor);
+    TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
+        node, kTransposeConvFilterTensor);
     TF_LITE_ENSURE(context, filter != nullptr);
     TfLiteTensor* bias =
-        micro_context->AllocateTempInputTensor(node, kBiasTensor);
-    TfLiteTensor* output =
-        micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+        micro_context->AllocateTempInputTensor(node, kTransposeConvBiasTensor);
+    TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
+        node, kTransposeConvOutputTensor);
     TF_LITE_ENSURE(context, output != nullptr);
-    int output_channels = filter->dims->data[kConvQuantizedDimension];
+    int output_channels = filter->dims->data[kTransposeConvQuantizedDimension];
 
     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
         context, input, filter, bias, output, kTfLiteActNone,
@@ -164,13 +160,13 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   MicroContext* micro_context = GetMicroContext(context);
 
   TfLiteTensor* output =
-      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+      micro_context->AllocateTempOutputTensor(node, kTransposeConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TfLiteTensor* input =
-      micro_context->AllocateTempInputTensor(node, kInputTensor);
+      micro_context->AllocateTempInputTensor(node, kTransposeConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* filter =
-      micro_context->AllocateTempInputTensor(node, kFilterTensor);
+      micro_context->AllocateTempInputTensor(node, kTransposeConvFilterTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
 
   TF_LITE_ENSURE_MSG(
@@ -186,7 +182,7 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   const int filter_height = SizeOfDimension(filter, 1);
 
   // Dynamically allocate per-channel quantization parameters.
-  const int num_channels = filter->dims->data[kConvQuantizedDimension];
+  const int num_channels = filter->dims->data[kTransposeConvQuantizedDimension];
   data->per_channel_output_multiplier =
       static_cast<int32_t*>(context->AllocatePersistentBuffer(
           context, num_channels * sizeof(int32_t)));
@@ -223,10 +219,10 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE(context, affine_quantization->scale);
     TF_LITE_ENSURE(context, affine_quantization->zero_point);
 
-    TF_LITE_ENSURE(context,
-                   affine_quantization->scale->size == 1 ||
-                       affine_quantization->scale->size ==
-                           filter->dims->data[kConvQuantizedDimension]);
+    TF_LITE_ENSURE(
+        context, affine_quantization->scale->size == 1 ||
+                     affine_quantization->scale->size ==
+                         filter->dims->data[kTransposeConvQuantizedDimension]);
     TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
                       affine_quantization->zero_point->size);
   }
@@ -244,6 +240,18 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   data->params.stride_width = params->stride_width;
   data->params.stride_height = params->stride_height;
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  data->filter_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(
+          node, kTransposeConvFilterTensor);
+  data->bias_scratch_index = micro_context->AllocateDecompressionScratchBuffer(
+      node, kTransposeConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   micro_context->DeallocateTempTfLiteTensor(output);
   micro_context->DeallocateTempTfLiteTensor(input);
   micro_context->DeallocateTempTfLiteTensor(filter);
@@ -252,15 +260,26 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) {
 
 TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kTransposeConvInputTensor);
   const TfLiteEvalTensor* filter =
-      tflite::micro::GetEvalInput(context, node, kFilterTensor);
+      tflite::micro::GetEvalInput(context, node, kTransposeConvFilterTensor);
   const TfLiteEvalTensor* bias =
       (NumInputs(node) == 4)
-          ? tflite::micro::GetEvalInput(context, node, kBiasTensor)
+          ? tflite::micro::GetEvalInput(context, node, kTransposeConvBiasTensor)
           : nullptr;
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kTransposeConvOutputTensor);
+
+#ifdef USE_TFLM_COMPRESSION
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  const CompressionTensorData* filter_comp_td =
+      micro_context->GetTensorCompressionData(node, kTransposeConvFilterTensor);
+  const CompressionTensorData* bias_comp_td =
+      micro_context->GetTensorCompressionData(node, kTransposeConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
 
   TFLITE_DCHECK(node->user_data != nullptr);
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
@@ -280,9 +299,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) {
           op_params, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(
+              micro_context, filter, filter_comp_td, data.filter_scratch_index),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(micro_context, bias, bias_comp_td,
+                                              data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
           tflite::micro::GetOptionalTensorData<float>(bias),
+#endif  // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
@@ -296,9 +323,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) {
           data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<int8_t>(input),
           tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int8_t>(
+              micro_context, filter, filter_comp_td, data.filter_scratch_index),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(
+              micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorData<int8_t>(filter),
           tflite::micro::GetTensorShape(bias),
           tflite::micro::GetOptionalTensorData<int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<int8_t>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
@@ -311,16 +346,29 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) {
         auto* bias_converted_buffer =
             static_cast<int64_t*>(context->GetScratchBuffer(
                 context, data.bias_converted_buffer_index));
+        const int16_t* const bias_int16_data =
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int16_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index);
+#else   // USE_TFLM_COMPRESSION
+            static_cast<int16_t*>(bias->data.data);
+#endif  // USE_TFLM_COMPRESSION
         for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize();
              i++) {
-          bias_converted_buffer[i] = bias->data.i16[i];
+          bias_converted_buffer[i] = bias_int16_data[i];
         }
         reference_integer_ops::TransposeConv(
             data.params, data.per_channel_output_multiplier,
             data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 filter_comp_td,
+                                                 data.filter_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(bias), bias_converted_buffer,
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output),
@@ -331,9 +379,18 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) {
             data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 filter_comp_td,
+                                                 data.filter_scratch_index),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<int64_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
             tflite::micro::GetTensorShape(bias),
-            tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+            tflite::micro::GetOptionalTensorData<int64_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output),
             tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
diff --git a/tensorflow/lite/micro/kernels/transpose_conv.h b/tensorflow/lite/micro/kernels/transpose_conv.h
index 3a99ccbf847..ec0416e067f 100644
--- a/tensorflow/lite/micro/kernels/transpose_conv.h
+++ b/tensorflow/lite/micro/kernels/transpose_conv.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,6 +23,19 @@ limitations under the License.
 
 namespace tflite {
 
+// For the TfLite transpose_conv implementation, input tensor 0 corresponds to
+// the OutputShapeTensor. However, since TFLM does not support dynamic tensors,
+// the TFLM implementation ignores input tensor 0 and the only inputs we care
+// about are kFilterTensor, kInputTensor and kBiasTensor.
+constexpr int kTransposeConvFilterTensor = 1;
+constexpr int kTransposeConvInputTensor = 2;
+constexpr int kTransposeConvBiasTensor = 3;
+constexpr int kTransposeConvOutputTensor = 0;
+
+// Conv is quantized along dimension 0:
+// https://www.tensorflow.org/lite/performance/quantization_spec
+constexpr int kTransposeConvQuantizedDimension = 0;
+
 // This is the most generic TFLMRegistration. The actual supported types
 // may still be target dependent. The only requirement is that every
 // implementation (reference or optimized) must define this function.
diff --git a/tensorflow/lite/micro/kernels/transpose_conv_test.cc b/tensorflow/lite/micro/kernels/transpose_conv_test.cc
index 49d2c90f439..e9716794229 100644
--- a/tensorflow/lite/micro/kernels/transpose_conv_test.cc
+++ b/tensorflow/lite/micro/kernels/transpose_conv_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,9 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/lite/micro/kernels/transpose_conv.h"
+
+#include <type_traits>
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/micro/kernels/conv_test.h"
 #include "tensorflow/lite/micro/kernels/kernel_runner.h"
 #include "tensorflow/lite/micro/micro_utils.h"
 #include "tensorflow/lite/micro/test_helpers.h"
@@ -47,20 +50,119 @@ static const float kGoldenData[kOutputElements] = {
     184,  412,  568,  528,  678,  1347, 1689, 1434,
     1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760};
 
+#ifdef USE_TFLM_COMPRESSION
+
+constexpr size_t kTransposeConvMaxTensors = 5;
+constexpr size_t kTransposeConvMaxInputTensors = 4;
+
+// compressed filter data for kBinQuant scheme, matches kFilterData
+constexpr uint8_t kBinQuantFilterData[] = {0x00, 0x44, 0x32, 0x14, 0xC7, 0x42,
+                                           0x54, 0xB6, 0x35, 0xCF, 0x84, 0x40};
+constexpr int kBinQuantFilterBitWidth = 5;
+// compressed bias data for kBinQuant scheme, matches kBiasData
+constexpr uint8_t kBinQuantBiasData[] = {0x00};
+constexpr int kBinQuantBiasBitWidth = 1;
+
+// Common inputs and outputs (quantized single channel).
+// data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel
+static int kInputShapeQ1[] = {4, 1, 4, 4, 1};
+static constexpr float kInputDataQ1[] = {1, 2,  3,  4,  5,  6,  7,  8,
+                                         9, 10, 11, 12, 13, 14, 15, 16};
+constexpr size_t kInputElementsQ1 = std::extent<decltype(kInputDataQ1)>::value;
+
+constexpr int kFilterNumChannelsQ1 = 1;
+static int kFilterShapeQ1[] = {4, 1, 3, 3, 1};
+static constexpr float kFilterDataQ1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+constexpr size_t kFilterElementsQ1 =
+    std::extent<decltype(kFilterDataQ1)>::value;
+
+static int kBiasShapeQ1[] = {1, 1};
+static constexpr float kBiasDataQ1[] = {1};
+constexpr size_t kBiasElementsQ1 = std::extent<decltype(kBiasDataQ1)>::value;
+
+static int kOutputShapeQ1[] = {4, 1, 4, 4, 1};
+static constexpr float kGoldenDataQ1[] = {
+    30, 62, 84, 76, 100, 194, 238, 200, 208, 372, 418, 330, 264, 446, 486, 366};
+constexpr int kOutputElementsQ1 = std::extent<decltype(kGoldenDataQ1)>::value;
+
+// compressed filter data for kBinQuant scheme, matches kFilterDataQ1
+constexpr uint8_t kBinQuantFilterDataQ1[] = {0x01, 0x23, 0x45, 0x67, 0x80};
+constexpr int kBinQuantFilterBitWidthQ1 = 4;
+// compressed bias data for kBinQuant scheme, matches kBiasDataQ1
+constexpr uint8_t kBinQuantBiasDataQ1[] = {0x00};
+constexpr int kBinQuantBiasBitWidthQ1 = 1;
+
+// Common inputs and outputs (quantized multi channel).
+// data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64
+static int kInputShapeQ2[] = {4, 1, 2, 3, 2};
+static constexpr float kInputDataQ2[] = {
+    // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+    3,  2,   // batch = 0, y = 0, x = 0
+    1,  -1,  // batch = 0, y = 0, x = 1
+    -2, -3,  // batch = 0, y = 0, x = 2
+    4,  3,   // batch = 0, y = 1, x = 0
+    2,  -2,  // batch = 0, y = 1, x = 1
+    -3, -4,  // batch = 0, y = 1, x = 2
+};
+constexpr size_t kInputElementsQ2 = std::extent<decltype(kInputDataQ2)>::value;
+
+constexpr int kFilterNumChannelsQ2 = 2;
+static int kFilterShapeQ2[] = {4, 2, 2, 2, 2};
+static constexpr float kFilterDataQ2[] = {
+    // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+    1, 2,  // out channel = 0, y = 0, x = 0
+    3, 4,  // out channel = 0, y = 0, x = 1
+    3, 4,  // out channel = 0, y = 1, x = 0
+    5, 6,  // out channel = 0, y = 1, x = 1
+    7, 8,  // out channel = 1, y = 0, x = 0
+    5, 6,  // out channel = 1, y = 0, x = 1
+    3, 4,  // out channel = 1, y = 1, x = 0
+    1, 2,  // out channel = 1, y = 1, x = 1
+};
+constexpr size_t kFilterElementsQ2 =
+    std::extent<decltype(kFilterDataQ2)>::value;
+
+static int kBiasShapeQ2[] = {1, 2};
+static constexpr float kBiasDataQ2[] = {3, -2};
+constexpr size_t kBiasElementsQ2 = std::extent<decltype(kBiasDataQ2)>::value;
+
+static int kOutputShapeQ2[] = {4, 1, 2, 3, 2};
+static constexpr float kGoldenDataQ2[] = {10, 35, 19, 24, -6,  -41,
+                                          30, 64, 51, 40, -29, -64};
+constexpr int kOutputElementsQ2 = std::extent<decltype(kGoldenDataQ2)>::value;
+
+// compressed filter data for kBinQuant scheme, matches kFilterDataQ2
+constexpr uint8_t kBinQuantFilterDataQ2[] = {0x05, 0x34, 0xE5,
+                                             0xDE, 0x54, 0xC1};
+constexpr float kBinQuantFilterValueTableQ2[] = {1, 2, 3, 4, 5, 6, 0, 0,
+                                                 1, 2, 3, 4, 5, 6, 7, 8};
+constexpr int kBinQuantFilterBitWidthQ2 = 3;
+// compressed bias data for kBinQuant scheme, matches kBiasDataQ2
+constexpr uint8_t kBinQuantBiasDataQ2[] = {0x00};
+constexpr int kBinQuantBiasBitWidthQ2 = 1;
+
+#endif  // USE_TFLM_COMPRESSION
+
 // Transpose conv uses TfLiteConvParams.
-static TfLiteConvParams common_conv_params = {kTfLitePaddingSame,  // padding
-                                              1,  // stride_width
-                                              1,  // stride_height
-                                              kTfLiteActNone,
-                                              1,
-                                              1,
-                                              kTfLiteNoType};
+static const TfLiteConvParams common_conv_params = {
+    kTfLitePaddingSame,  // padding
+    1,                   // stride_width
+    1,                   // stride_height
+    kTfLiteActNone,
+    1,
+    1,
+    kTfLiteNoType};
 
 template <typename T>
-TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size,
-                                 int output_length,
-                                 TfLiteConvParams* conv_params,
-                                 T* output_data) {
+TfLiteStatus InvokeTransposeConv(
+    TfLiteTensor* tensors, int tensors_size, int output_length,
+    const TfLiteConvParams* conv_params, T* output_data
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const CompressedTensorList* comp_list_p = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
+  // TODO(b/358151309): support optional bias tensor
   int inputs_array_data[] = {4, 0, 1, 2, 3};
   TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
   int outputs_array_data[] = {1, 4};
@@ -68,7 +170,12 @@ TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size,
 
   const TFLMRegistration registration = tflite::Register_TRANSPOSE_CONV();
   micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
-                             outputs_array, conv_params);
+                             outputs_array, conv_params
+#ifdef USE_TFLM_COMPRESSION
+                             ,
+                             nullptr, comp_list_p
+#endif  // USE_TFLM_COMPRESSION
+  );
 
   const char* init_data = reinterpret_cast<const char*>(conv_params);
   TfLiteStatus status = runner.InitAndPrepare(init_data);
@@ -78,15 +185,44 @@ TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size,
   return runner.Invoke();
 }
 
-template <typename T>
-TfLiteStatus ValidateTransposeConvGoldens(TfLiteTensor* tensors,
-                                          int tensors_size,
-                                          const T* expected_output_data,
-                                          int output_length,
-                                          TfLiteConvParams* conv_params,
-                                          T* output_data, float tolerance) {
+template <typename T, typename CTF = void, typename CTB = void>
+TfLiteStatus ValidateTransposeConvGoldens(
+    TfLiteTensor* tensors, int tensors_size, const T* expected_output_data,
+    int output_length, const TfLiteConvParams* conv_params, T* output_data,
+    float tolerance = 1e-5f
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
+#ifdef USE_TFLM_COMPRESSION
+
+  TestCompressedList<kTransposeConvMaxInputTensors, CTF, CTB> tcl;
+  const CompressedTensorList* comp_list_p = nullptr;
+
+  if (comp_info != nullptr) {
+    TF_LITE_MICRO_EXPECT_EQ(
+        tcl.AddWeight(*comp_info, tensors[kTransposeConvFilterTensor],
+                      kTransposeConvFilterTensor),
+        kTfLiteOk);
+    TF_LITE_MICRO_CHECK_FAIL();
+    TF_LITE_MICRO_EXPECT_EQ(
+        tcl.AddBias(*comp_info, tensors[kTransposeConvBiasTensor],
+                    kTransposeConvBiasTensor),
+        kTfLiteOk);
+    TF_LITE_MICRO_CHECK_FAIL();
+    comp_list_p = tcl.GetCompressedTensorList();
+  }
+
+#endif  // USE_TFLM_COMPRESSION
+
   TfLiteStatus status = InvokeTransposeConv(
-      tensors, tensors_size, output_length, conv_params, output_data);
+      tensors, tensors_size, output_length, conv_params, output_data
+#ifdef USE_TFLM_COMPRESSION
+      ,
+      comp_list_p
+#endif  // USE_TFLM_COMPRESSION
+  );
   if (status != kTfLiteOk) {
     return status;
   }
@@ -97,11 +233,17 @@ TfLiteStatus ValidateTransposeConvGoldens(TfLiteTensor* tensors,
   return kTfLiteOk;
 }
 
+template <typename CTF = void, typename CTB = void>
 TfLiteStatus TestTransposeConvFloat(
     int* input_dims_data, const float* input_data, int* filter_dims_data,
     const float* filter_data, int* bias_dims_data, const float* bias_data,
     int* output_dims_data, const float* expected_output_data,
-    TfLiteConvParams* conv_params, float* output_data) {
+    const TfLiteConvParams* conv_params, float* output_data
+#ifdef USE_TFLM_COMPRESSION
+    ,
+    const TestCompressionInfo<CTF, CTB>* comp_info = nullptr
+#endif  // USE_TFLM_COMPRESSION
+) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -125,7 +267,12 @@ TfLiteStatus TestTransposeConvFloat(
 
   return ValidateTransposeConvGoldens(tensors, tensors_size,
                                       expected_output_data, output_dims_count,
-                                      conv_params, output_data, 0.001f);
+                                      conv_params, output_data
+#ifdef USE_TFLM_COMPRESSION
+                                      ,
+                                      1e-5, comp_info
+#endif  // USE_TFLM_COMPRESSION
+  );
 }
 
 TfLiteStatus TestTransposeConvQuantized(
@@ -135,8 +282,8 @@ TfLiteStatus TestTransposeConvQuantized(
     int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
     float* bias_scales, int* bias_zero_points, int* output_dims_data,
     const float* expected_output_data, int8_t* expected_output_quantized,
-    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
-    int8_t* output_data) {
+    float output_scale, int output_zero_point,
+    const TfLiteConvParams* conv_params, int8_t* output_data) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -181,8 +328,8 @@ TfLiteStatus TestTransposeConvQuantized(
     int* bias_dims_data, const float* bias_data, T* bias_quantized,
     float* bias_scales, int* bias_zero_points, int* output_dims_data,
     const float* expected_output_data, int16_t* expected_output_quantized,
-    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
-    int16_t* output_data) {
+    float output_scale, int output_zero_point,
+    const TfLiteConvParams* conv_params, int16_t* output_data) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -221,6 +368,76 @@ TfLiteStatus TestTransposeConvQuantized(
       conv_params, output_data, 4.0f);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+template <typename TIO, typename CTB>
+TfLiteStatus TestTransposeConvQuantizedCompressed(
+    int* input_dims_data, const float* input_data, TIO* input_quantized,
+    float input_scale, int input_zero_point, int* output_dims_data,
+    const float* expected_output_data, TIO* expected_output_quantized,
+    TIO* output_quantized, float output_scale, int output_zero_point,
+    const TfLiteConvParams* conv_params, const unsigned int tolerance,
+    const TestCompressionQuantizedInfo<CTB>* comp_info) {
+  // TODO(b/358151309): account for optional bias tensor
+  // bool null_bias = comp_info->bias_data == nullptr ? true : false;
+
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+
+  TfLiteFloatArray* filter_scales =
+      FloatArrayFromFloats(comp_info->filter_scales);
+  TfLiteIntArray* filter_zero_points =
+      IntArrayFromInts(comp_info->filter_zero_points);
+  TfLiteFloatArray* bias_scales = FloatArrayFromFloats(comp_info->bias_scales);
+  TfLiteIntArray* bias_zero_points =
+      IntArrayFromInts(comp_info->bias_zero_points);
+
+  TfLiteAffineQuantization filter_quant = {};
+  TfLiteTensor filter_tensor = CreatePerChannelQuantizedTensor(
+      comp_info->filter_compressed, filter_dims, filter_scales,
+      filter_zero_points, &filter_quant, kTransposeConvQuantizedDimension,
+      false /* is_variable */, kTfLiteInt8);
+  SymmetricPerChannelQuantize(
+      comp_info->filter_data, comp_info->filter_value_table,
+      ElementCount(*filter_dims), filter_scales->size, filter_scales->data);
+
+  TfLiteAffineQuantization bias_quant = {};
+  TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor(
+      comp_info->bias_compressed, bias_dims, input_scale, filter_scales,
+      bias_scales, bias_zero_points, &bias_quant,
+      kTransposeConvQuantizedDimension, false /* is_variable */,
+      typeToTfLiteType<CTB>());
+  SymmetricPerChannelQuantize(comp_info->bias_data, comp_info->bias_value_table,
+                              ElementCount(*bias_dims), bias_scales->size,
+                              bias_scales->data);
+
+  int output_shape_dims_data[] = {1, 0};
+  int32_t* output_shape = nullptr;
+  TfLiteIntArray* output_shape_dims = IntArrayFromInts(output_shape_dims_data);
+
+  constexpr int tensors_size = kTransposeConvMaxTensors;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateTensor(output_shape, output_shape_dims),
+      filter_tensor,
+      CreateQuantizedTensor(input_data, input_quantized, input_dims,
+                            input_scale, input_zero_point),
+      bias_tensor,
+      CreateQuantizedTensor(output_quantized, output_dims, output_scale,
+                            output_zero_point),
+  };
+
+  const int output_dims_count = ElementCount(*output_dims);
+  Quantize(expected_output_data, expected_output_quantized, output_dims_count,
+           output_scale, output_zero_point);
+  return ValidateTransposeConvGoldens(
+      tensors, tensors_size, expected_output_quantized, output_dims_count,
+      conv_params, output_quantized, tolerance, comp_info);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
@@ -240,6 +457,36 @@ TF_LITE_MICRO_TEST(SimpleTestFloat) {
           &tflite::testing::common_conv_params, output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestFloatCompressed) {
+  tflite::testing::TestCompressionInfo<const float, const float> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+  comp_info.filter_value_table = tflite::testing::kFilterData;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::kFilterData)>::value;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth;
+  comp_info.bias_value_table = tflite::testing::kBiasData;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasData)>::value;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth;
+
+  float output_data[tflite::testing::kOutputElements];
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestTransposeConvFloat(
+          tflite::testing::kInputShape, tflite::testing::kInputData,
+          tflite::testing::kFilterShape,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantFilterData),
+          tflite::testing::kBiasShape,
+          reinterpret_cast<const float*>(tflite::testing::kBinQuantBiasData),
+          tflite::testing::kOutputShape, tflite::testing::kGoldenData,
+          &tflite::testing::common_conv_params, output_data, &comp_info));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(fusedRELUTest) {
   float output_data[tflite::testing::kOutputElements];
   float golden_data[] = {29,  24,  0, 0, 99,  72,  0,   0,
@@ -476,4 +723,199 @@ TF_LITE_MICRO_TEST(HybridModeIsError) {
                         &tflite::testing::common_conv_params, output_data));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelSingleChannelCompressed) {
+  // data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel
+  const float input_scale = 16.0f / 255.0f;
+  const float output_scale = 2.0f;
+  const int input_zero_point = -128;
+  const int output_zero_point = -128;
+  constexpr float filter_scales[] = {
+      tflite::testing::kFilterNumChannelsQ1,
+      9.0f / 127.0f,
+  };
+  constexpr int filter_zero_points[] = {
+      tflite::testing::kFilterNumChannelsQ1,
+      0,
+  };
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int8_t input_quantized[tflite::testing::kInputElementsQ1];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ1];
+  int32_t bias_quantized[tflite::testing::kBiasElementsQ1];
+  int8_t golden_quantized[tflite::testing::kOutputElementsQ1];
+  int8_t output_quantized[tflite::testing::kOutputElementsQ1];
+
+  tflite::testing::TestCompressionQuantizedInfo<int32_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<decltype(tflite::testing::kFilterDataQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1;
+  comp_info.filter_data = tflite::testing::kFilterDataQ1;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ1)>::value /
+      tflite::testing::kFilterNumChannelsQ1;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1;
+  comp_info.bias_data = tflite::testing::kBiasDataQ1;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestTransposeConvQuantizedCompressed(
+          tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params, 0, &comp_info));
+}
+
+TF_LITE_MICRO_TEST(
+    SimpleBiasTestQuantizedPerChannelBias16MultiChannelCompressed) {
+  // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64
+  const float input_scale = 4.0f / 127.0f;
+  const float output_scale = 128.0f / 65536.0f;
+  const int input_zero_point = 0;
+  const int output_zero_point = 0;
+  constexpr float filter_scales[] = {
+      tflite::testing::kFilterNumChannelsQ2,
+      7.0f / 127.0f,
+      8.0f / 127.0f,
+  };
+  constexpr int filter_zero_points[] = {
+      tflite::testing::kFilterNumChannelsQ2,
+      0,
+      0,
+  };
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int16_t input_quantized[tflite::testing::kInputElementsQ2];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ2];
+  int16_t bias_quantized[tflite::testing::kBiasElementsQ2];
+  int16_t golden_quantized[tflite::testing::kOutputElementsQ2];
+  int16_t output_quantized[tflite::testing::kOutputElementsQ2];
+
+  tflite::testing::TestCompressionQuantizedInfo<int16_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<
+          decltype(tflite::testing::kBinQuantFilterValueTableQ2)>::value /
+      tflite::testing::kFilterNumChannelsQ2;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ2;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ2;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ2;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ2;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ2)>::value /
+      tflite::testing::kFilterNumChannelsQ2;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ2;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ2;
+  comp_info.bias_data = tflite::testing::kBiasDataQ2;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ2;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  // The quantized output is compared to the expected output (quantized).
+  // A tolerance of 81 is approx. 0.1582f which is less than the TfLite
+  // tolerance of 0.19f.
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestTransposeConvQuantizedCompressed(
+          tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params, 81, &comp_info));
+}
+
+TF_LITE_MICRO_TEST(
+    SimpleBiasTestQuantizedPerChannelBias64MultiChannelCompressed) {
+  // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64
+  const float input_scale = 4.0f / 127.0f;
+  const float output_scale = 128.0f / 65536.0f;
+  const int input_zero_point = 0;
+  const int output_zero_point = 0;
+  constexpr float filter_scales[] = {
+      tflite::testing::kFilterNumChannelsQ2,
+      7.0f / 127.0f,
+      8.0f / 127.0f,
+  };
+  constexpr int filter_zero_points[] = {
+      tflite::testing::kFilterNumChannelsQ2,
+      0,
+      0,
+  };
+  // bias scales and zero points will be computed
+  float bias_scales[std::extent<decltype(filter_scales)>::value] = {};
+  int bias_zero_points[std::extent<decltype(filter_scales)>::value] = {};
+
+  int16_t input_quantized[tflite::testing::kInputElementsQ2];
+  int8_t filter_quantized[tflite::testing::kFilterElementsQ2];
+  int64_t bias_quantized[tflite::testing::kBiasElementsQ2];
+  int16_t golden_quantized[tflite::testing::kOutputElementsQ2];
+  int16_t output_quantized[tflite::testing::kOutputElementsQ2];
+
+  tflite::testing::TestCompressionQuantizedInfo<int64_t> comp_info = {};
+  comp_info.scheme = tflite::CompressionScheme::kBinQuant;
+
+  comp_info.filter_value_table = filter_quantized;
+  comp_info.filter_value_table_stride =
+      std::extent<
+          decltype(tflite::testing::kBinQuantFilterValueTableQ2)>::value /
+      tflite::testing::kFilterNumChannelsQ2;
+  comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ2;
+  comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ2;
+  comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ2;
+  comp_info.filter_dims_data = tflite::testing::kFilterShapeQ2;
+  comp_info.filter_scales = filter_scales;
+  comp_info.filter_zero_points = filter_zero_points;
+
+  comp_info.bias_value_table = bias_quantized;
+  comp_info.bias_value_table_stride =
+      std::extent<decltype(tflite::testing::kBiasDataQ2)>::value /
+      tflite::testing::kFilterNumChannelsQ2;
+  comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ2;
+  comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ2;
+  comp_info.bias_data = tflite::testing::kBiasDataQ2;
+  comp_info.bias_dims_data = tflite::testing::kBiasShapeQ2;
+  comp_info.bias_scales = bias_scales;
+  comp_info.bias_zero_points = bias_zero_points;
+
+  // The quantized output is compared to the expected output (quantized).
+  // A tolerance of 81 is approx. 0.1582f which is less than the TfLite
+  // tolerance of 0.19f.
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      tflite::testing::TestTransposeConvQuantizedCompressed(
+          tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2,
+          input_quantized, input_scale, input_zero_point,
+          tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2,
+          golden_quantized, output_quantized, output_scale, output_zero_point,
+          &tflite::testing::common_conv_params, 81, &comp_info));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc
index 930da754bb5..c83a009b4b6 100644
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -36,6 +36,15 @@ limitations under the License.
 #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
+#ifdef USE_TFLM_COMPRESSION
+
+#include <algorithm>
+#include <cstring>
+
+#include "tensorflow/lite/micro/compression/metadata_generated.h"
+
+#endif  // USE_TFLM_COMPRESSION
+
 namespace tflite {
 
 namespace {
@@ -355,6 +364,142 @@ TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
   return kTfLiteOk;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+const tflite::micro::compression::Metadata* GetCompressionMetadata(
+    const Model& model) {
+  const auto metadata_vector = model.metadata();
+  if (metadata_vector == nullptr) {
+    return nullptr;
+  }
+  auto buffers = model.buffers();
+  if (buffers == nullptr) {
+    return nullptr;
+  }
+  const size_t metadata_string_length = std::strlen(kCompressionMetadataString);
+  for (size_t metadata_index = 0; metadata_index < metadata_vector->size();
+       metadata_index++) {
+    auto metadata = metadata_vector->Get(metadata_index);
+    if (metadata->name() == nullptr || metadata->name()->size() == 0) {
+      continue;
+    }
+    const char* s = metadata->name()->c_str();
+    if ((metadata->name()->size() == metadata_string_length) &&
+        (std::strncmp(s, kCompressionMetadataString, metadata_string_length) ==
+         0)) {
+      auto buffer_index = metadata->buffer();
+      if (buffer_index == 0 || buffer_index >= buffers->size()) {
+        MicroPrintf("Compression: Invalid buffer index %u", buffer_index);
+        continue;
+      }
+      auto vp = buffers->Get(buffer_index)->data();
+      if (vp == nullptr || vp->data() == nullptr) {
+        MicroPrintf("Compression: Invalid data for buffer index %u",
+                    buffer_index);
+        continue;
+      }
+      // TODO(ddavis-2015): support multiple compression methods, possibly
+      // through multiple verification checks.
+      // Then return a pair<void*, compression_scheme>.
+      auto compression_metadata =
+          tflite::micro::compression::GetSizePrefixedMetadata(vp);
+      flatbuffers::Verifier verifier(vp->data(), vp->size(),
+                                     flatbuffers::Verifier::Options());
+      if (!tflite::micro::compression::VerifyMetadataBuffer(verifier)) {
+        MicroPrintf("Compression: verification failure");
+        return nullptr;
+      } else {
+        return compression_metadata;
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+TfLiteStatus InitializeCompressionTensorDataFromFlatbuffer(
+    const Model& model, const size_t subgraph_index,
+    const tflite::micro::compression::LutTensor& lut_tensor,
+    CompressionTensorData* ctd) {
+  // TODO(ddavis-2015): support multiple compression schemes
+  ctd->scheme = CompressionScheme::kBinQuant;
+
+  const size_t tensor_index = lut_tensor.tensor();
+  auto tensors = model.subgraphs()->Get(subgraph_index)->tensors();
+  if (tensor_index >= tensors->size()) {
+    MicroPrintf("Compression: invalid tensor index %u in LutTensor",
+                tensor_index);
+    return kTfLiteError;
+  }
+  const size_t index_bit_width = lut_tensor.index_bitwidth();
+  if (index_bit_width > LookupTableData::kMaxBitWidth) {
+    MicroPrintf("Compression: invalid bit width %u in LutTensor",
+                index_bit_width);
+    return kTfLiteError;
+  }
+  ctd->data.lut_data->compressed_bit_width = index_bit_width;
+  const size_t value_buffer_index = lut_tensor.value_buffer();
+  if (value_buffer_index >= model.buffers()->size()) {
+    MicroPrintf("Compression: invalid value_buffer %u in LutTensor",
+                value_buffer_index);
+    return kTfLiteError;
+  }
+  auto value_buffer = model.buffers()->Get(value_buffer_index)->data();
+  if (value_buffer == nullptr || value_buffer->data() == nullptr) {
+    MicroPrintf("Compression: invalid value table for value_buffer %u",
+                value_buffer_index);
+    return kTfLiteError;
+  }
+  ctd->data.lut_data->value_table = value_buffer->data();
+  auto tensor =
+      model.subgraphs()->Get(subgraph_index)->tensors()->Get(tensor_index);
+  if (tensor->shape() == nullptr) {
+    MicroPrintf("Compression: scalar tensors not supported");
+    return kTfLiteError;
+  }
+  TfLiteType tensor_type = kTfLiteNoType;
+  TfLiteStatus status = ConvertTensorType(tensor->type(), &tensor_type);
+  if (status != kTfLiteOk) {
+    MicroPrintf("Compression: failed to convert tensor type");
+    return kTfLiteError;
+  }
+  size_t tensor_type_size = 0;
+  status = TfLiteTypeSizeOf(tensor_type, &tensor_type_size);
+  if (status != kTfLiteOk) {
+    MicroPrintf("Compression: failed to get tensor type size");
+    return kTfLiteError;
+  }
+  if (tensor->quantization() != nullptr &&
+      tensor->quantization()->scale() != nullptr &&
+      tensor->quantization()->scale()->size() > 1) {
+    const size_t num_channels = tensor->quantization()->scale()->size();
+    ctd->data.lut_data->is_per_channel_quantized = true;
+    const TfLiteIntArray* dims =
+        FlatBufferVectorToTfLiteTypeArray(tensor->shape());
+    int32_t quantized_axis = tensor->quantization()->quantized_dimension();
+    if (quantized_axis == 0) {
+      ctd->data.lut_data->use_alternate_axis = false;
+    } else if (quantized_axis == (dims->size - 1)) {
+      ctd->data.lut_data->use_alternate_axis = true;
+    } else {
+      MicroPrintf("Compression: unsupported quantization axis %u",
+                  quantized_axis);
+      return kTfLiteError;
+    }
+    ctd->data.lut_data->value_table_channel_stride =
+        (value_buffer->size() / tensor_type_size) / num_channels;
+  } else {
+    ctd->data.lut_data->is_per_channel_quantized = false;
+    ctd->data.lut_data->use_alternate_axis = false;
+    ctd->data.lut_data->value_table_channel_stride =
+        value_buffer->size() / tensor_type_size;
+  }
+
+  return kTfLiteOk;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace internal
 
 size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) {
@@ -502,7 +647,11 @@ SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
     return nullptr;
   }
 
-  if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
+  if (
+#ifdef USE_TFLM_COMPRESSION
+      AllocateCompressedTensorsList(model, output) != kTfLiteOk ||
+#endif  // USE_TFLM_COMPRESSION
+      AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
       AllocateNodeAndRegistrations(model, output) != kTfLiteOk) {
     return nullptr;
   }
@@ -757,6 +906,121 @@ bool MicroAllocator::IsAllTempDeallocated() {
   return non_persistent_buffer_allocator_->IsAllTempDeallocated();
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TfLiteStatus MicroAllocator::AllocateCompressedTensorsList(
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  TFLITE_DCHECK(subgraph_allocations != nullptr);
+
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    subgraph_allocations[subgraph_idx].compressed.tensors = nullptr;
+  }
+
+  const tflite::micro::compression::Metadata* compression_metadata =
+      internal::GetCompressionMetadata(*model);
+  if (compression_metadata == nullptr) {
+    // no compression metadata is available
+    return kTfLiteOk;
+  }
+  if (compression_metadata->subgraphs() == nullptr) {
+    MicroPrintf("Compression: invalid Subgraph vector");
+    return kTfLiteError;
+  }
+  if (compression_metadata->subgraphs()->size() == 0) {
+    MicroPrintf("Compression: zero length Subgraph vector");
+    return kTfLiteError;
+  }
+
+  for (size_t subgraph_index = 0;
+       subgraph_index < compression_metadata->subgraphs()->size();
+       subgraph_index++) {
+    auto subgraph = compression_metadata->subgraphs()->Get(subgraph_index);
+
+    if (subgraph->lut_tensors() == nullptr) {
+      MicroPrintf("Compression: invalid LutTensor vector");
+      return kTfLiteError;
+    }
+    if (subgraph->lut_tensors()->size() == 0) {
+      MicroPrintf("Compression: zero length LutTensor vector");
+      return kTfLiteError;
+    }
+
+    for (size_t lut_tensors_index = 0;
+         lut_tensors_index < subgraph->lut_tensors()->size();
+         lut_tensors_index++) {
+      auto lut_tensor = subgraph->lut_tensors()->Get(lut_tensors_index);
+
+      CompressionTensorData* ctd = reinterpret_cast<CompressionTensorData*>(
+          persistent_buffer_allocator_->AllocatePersistentBuffer(
+              sizeof(CompressionTensorData), alignof(CompressionTensorData)));
+      if (ctd == nullptr) {
+        MicroPrintf(
+            "Compressions: failed to allocate memory for "
+            "CompressionTensorData, %d bytes required",
+            sizeof(CompressionTensorData));
+        return kTfLiteError;
+      }
+
+      LookupTableData* lut_table = reinterpret_cast<LookupTableData*>(
+          persistent_buffer_allocator_->AllocatePersistentBuffer(
+              sizeof(LookupTableData), alignof(LookupTableData)));
+      if (lut_table == nullptr) {
+        MicroPrintf(
+            "Compressions: failed to allocate memory for LookupTableData, "
+            "%d bytes required",
+            sizeof(LookupTableData));
+        return kTfLiteError;
+      }
+      ctd->data.lut_data = lut_table;
+
+      TfLiteStatus status =
+          internal::InitializeCompressionTensorDataFromFlatbuffer(
+              *model, subgraph_index, *lut_tensor, ctd);
+      if (status != kTfLiteOk) {
+        MicroPrintf("Compression: failed to initialize data for LutTensor %u",
+                    lut_tensors_index);
+        return kTfLiteError;
+      }
+
+      if (subgraph_allocations[subgraph_index].compressed.tensors == nullptr) {
+        size_t alloc_count =
+            model->subgraphs()->Get(subgraph_index)->tensors()->size();
+        const CompressionTensorData** tensors =
+            reinterpret_cast<const CompressionTensorData**>(
+                persistent_buffer_allocator_->AllocatePersistentBuffer(
+                    sizeof(CompressionTensorData*) * alloc_count,
+                    alignof(CompressionTensorData*)));
+        if (tensors == nullptr) {
+          MicroPrintf(
+              "Compression: failed to allocate memory for compression tensor "
+              "list, %d bytes required",
+              sizeof(CompressionTensorData*) * alloc_count);
+          return kTfLiteError;
+        }
+
+        subgraph_allocations[subgraph_index].compressed.tensors = tensors;
+        std::fill(tensors, tensors + alloc_count, nullptr);
+      }
+
+      const size_t tensor_index = lut_tensor->tensor();
+      if (subgraph_allocations[subgraph_index]
+              .compressed.tensors[tensor_index] != nullptr) {
+        MicroPrintf("Compression: duplicate LutTensor subgraph %u tensor %u",
+                    subgraph_index, tensor_index);
+        return kTfLiteError;
+      } else {
+        subgraph_allocations[subgraph_index].compressed.tensors[tensor_index] =
+            ctd;
+      }
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
     const Model* model, SubgraphAllocations* subgraph_allocations) {
   TFLITE_DCHECK(subgraph_allocations != nullptr);
diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h
index 02317220e12..215bffc6a8c 100644
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,6 +26,12 @@ limitations under the License.
 #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
+#ifdef USE_TFLM_COMPRESSION
+
+#include "tensorflow/lite/micro/compression.h"
+
+#endif  // USE_TFLM_COMPRESSION
+
 namespace tflite {
 
 // TODO(b/199402574): rename to tflite_internal or just remove internal
@@ -91,6 +97,9 @@ struct ScratchBufferHandle {
 struct SubgraphAllocations {
   NodeAndRegistration* node_and_registrations;
   TfLiteEvalTensor* tensors;
+#ifdef USE_TFLM_COMPRESSION
+  CompressedTensorList compressed;
+#endif  // USE_TFLM_COMPRESSION
 };
 
 // Allocator responsible for allocating memory for all intermediate tensors
@@ -258,6 +267,15 @@ class MicroAllocator {
                  MicroMemoryPlanner* memory_planner);
   virtual ~MicroAllocator();
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // Allocates an array in the arena of pointers to the compressions data
+  // required to decompress tensors for each subgraph within the model.
+  virtual TfLiteStatus AllocateCompressedTensorsList(
+      const Model* model, SubgraphAllocations* subgraph_allocations);
+
+#endif  // USE_TFLM_COMPRESSION
+
   // Allocates an array in the arena to hold pointers to the node and
   // registration pointers required to represent the inference graph of the
   // model.
diff --git a/tensorflow/lite/micro/micro_context.cc b/tensorflow/lite/micro/micro_context.cc
index 295b3c34463..c21caac0e89 100644
--- a/tensorflow/lite/micro/micro_context.cc
+++ b/tensorflow/lite/micro/micro_context.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,8 +18,10 @@ limitations under the License.
 #include <cstdarg>
 #include <cstddef>
 
+#include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/micro/micro_common.h"
 #include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
 namespace {
@@ -34,6 +36,103 @@ int GetTensorIndex(int index, int max_size, const int* tensor_indices) {
   return -1;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+struct DecompressionState {
+  DecompressionState() = delete;
+
+  DecompressionState(const uint8_t* compressed_indices,
+                     const size_t count_indices,
+                     const CompressionTensorData& comp_data,
+                     const size_t num_channels)
+      : compressed_indices_(compressed_indices),
+        count_indices_(count_indices),
+        comp_data_(comp_data),
+        num_channels_(num_channels) {}
+
+  template <typename T>
+  T* DecompressToBuffer(void* buffer);
+
+  size_t GetNextTableIndex();
+  void UpdateBufferAndChannelIndex();
+
+ private:
+  const uint8_t* compressed_indices_;
+  const size_t count_indices_;
+  const CompressionTensorData& comp_data_;
+  const size_t num_channels_;
+  const size_t compressed_bit_width_ =
+      comp_data_.data.lut_data->compressed_bit_width;
+  size_t channel_ = 0;
+  size_t index_in_channel_ = 0;
+  const size_t elements_per_channel_ =
+      comp_data_.data.lut_data->use_alternate_axis
+          ? 1
+          : count_indices_ / num_channels_;
+  size_t buffer_index_ = 0;
+  size_t current_offset_ = 0;
+  size_t current_bits_remaining_ = 8;
+  uint8_t current_byte_ = compressed_indices_[0];
+};
+
+template <typename T>
+T* DecompressionState::DecompressToBuffer(void* buffer) {
+  while (buffer_index_ < count_indices_) {
+    const size_t table_index = GetNextTableIndex();
+    static_cast<T*>(buffer)[buffer_index_] =
+        static_cast<const T*>(comp_data_.data.lut_data->value_table)
+            [table_index +
+             (channel_ * comp_data_.data.lut_data->value_table_channel_stride)];
+    UpdateBufferAndChannelIndex();
+  }
+
+  return static_cast<T*>(buffer);
+}
+
+size_t DecompressionState::GetNextTableIndex() {
+  TFLITE_DCHECK(compressed_bit_width_ <= LookupTableData::kMaxBitWidth);
+  TFLITE_DCHECK(compressed_bit_width_ > 0);
+
+  size_t table_index_bits_to_fill = compressed_bit_width_;
+  size_t table_index = 0;
+
+  while (table_index_bits_to_fill > 0) {
+    if (current_bits_remaining_ == 0) {
+      current_offset_++;
+      current_byte_ = compressed_indices_[current_offset_];
+      current_bits_remaining_ = 8;
+    }
+
+    const uint8_t mask_bit_count =
+        std::min(table_index_bits_to_fill,
+                 std::min(compressed_bit_width_, current_bits_remaining_));
+    const uint8_t current_byte_mask = (1 << mask_bit_count) - 1;
+    table_index <<= mask_bit_count;
+    table_index |=
+        (current_byte_ >> (current_bits_remaining_ - mask_bit_count)) &
+        current_byte_mask;
+
+    table_index_bits_to_fill -= mask_bit_count;
+    current_bits_remaining_ -= mask_bit_count;
+  }
+
+  return table_index;
+}
+
+void DecompressionState::UpdateBufferAndChannelIndex() {
+  buffer_index_++;
+  index_in_channel_++;
+  if (index_in_channel_ == elements_per_channel_) {
+    index_in_channel_ = 0;
+    channel_++;
+    if (channel_ == num_channels_) {
+      channel_ = 0;
+    }
+  }
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace
 
 TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node,
@@ -74,4 +173,57 @@ void MicroContextReportOpError(struct TfLiteContext* context,
   va_end(args);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+void* MicroContext::DecompressTensorToScratchBuffer(
+    const TfLiteEvalTensor& tensor,
+    const CompressionTensorData& compression_data, int scratch_buffer_handle) {
+  TFLITE_DCHECK(compression_data.scheme == CompressionScheme::kBinQuant);
+  TFLITE_DCHECK(scratch_buffer_handle != -1);
+  uint8_t* scratch_buffer =
+      static_cast<uint8_t*>(GetScratchBuffer(scratch_buffer_handle));
+  TFLITE_DCHECK(scratch_buffer != nullptr);
+  size_t count = ElementCount(*tensor.dims);
+  size_t num_channels = 1;
+
+  if (compression_data.data.lut_data->is_per_channel_quantized) {
+    const size_t channel_axis =
+        compression_data.data.lut_data->use_alternate_axis
+            ? tensor.dims->size - 1
+            : 0;
+    num_channels = tensor.dims->data[channel_axis];
+  }
+
+  DecompressionState ds(static_cast<uint8_t*>(tensor.data.data), count,
+                        compression_data, num_channels);
+
+  switch (tensor.type) {
+    case kTfLiteBool: {
+      return ds.DecompressToBuffer<bool>(scratch_buffer);
+    } break;
+    case kTfLiteInt8: {
+      return ds.DecompressToBuffer<int8_t>(scratch_buffer);
+    } break;
+    case kTfLiteInt16: {
+      return ds.DecompressToBuffer<int16_t>(scratch_buffer);
+    } break;
+    case kTfLiteInt32: {
+      return ds.DecompressToBuffer<int32_t>(scratch_buffer);
+    } break;
+    case kTfLiteInt64: {
+      return ds.DecompressToBuffer<int64_t>(scratch_buffer);
+    } break;
+    case kTfLiteFloat32: {
+      return ds.DecompressToBuffer<float>(scratch_buffer);
+    } break;
+    default: {
+      MicroPrintf("Unsupported decompression tensor type %d", tensor.type);
+    } break;
+  }
+
+  return nullptr;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_interpreter_context.cc b/tensorflow/lite/micro/micro_interpreter_context.cc
index 098df15d522..0ba461fd7b6 100644
--- a/tensorflow/lite/micro/micro_interpreter_context.cc
+++ b/tensorflow/lite/micro/micro_interpreter_context.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,8 +18,28 @@ limitations under the License.
 #include <cstdint>
 
 #include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
+
+namespace {
+
+#ifdef USE_TFLM_COMPRESSION
+
+int GetInputTensorIndex(const TfLiteNode* node, const int index) {
+  if (index >= 0 && index < node->inputs->size) {
+    const int tensor_index = node->inputs->data[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      return tensor_index;
+    }
+  }
+  return -1;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
+}  // namespace
+
 MicroInterpreterContext::MicroInterpreterContext(MicroAllocator* allocator,
                                                  const Model* model,
                                                  MicroInterpreterGraph* graph)
@@ -106,4 +126,83 @@ MicroInterpreterContext::GetInterpreterState() const {
   return state_;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+// Available during Prepare & Eval. Returns false if tensor is not
+// compressed.
+bool MicroInterpreterContext::IsTensorCompressed(const TfLiteNode* node,
+                                                 int tensor_idx) {
+  TFLITE_DCHECK(state_ == InterpreterState::kPrepare ||
+                state_ == InterpreterState::kInvoke);
+
+  const SubgraphAllocations* allocations =
+      &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()];
+  if (allocations->compressed.tensors == nullptr) {
+    return false;
+  }
+  int index = GetInputTensorIndex(node, tensor_idx);
+  if (index == -1) {
+    return false;
+  }
+  return allocations->compressed.tensors[index] != nullptr;
+}
+
+// Only available during Prepare. The kernel is responsible for storing the
+// scratch buffer handle.
+int MicroInterpreterContext::AllocateDecompressionScratchBuffer(
+    const TfLiteNode* node, int tensor_idx) {
+  TFLITE_DCHECK(state_ == InterpreterState::kPrepare);
+
+  const SubgraphAllocations* allocations =
+      &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()];
+  if (allocations->compressed.tensors == nullptr) {
+    return -1;
+  }
+  int index = GetInputTensorIndex(node, tensor_idx);
+  if (index == -1 || allocations->compressed.tensors[index] == nullptr) {
+    return -1;
+  }
+  const TfLiteEvalTensor* tensor = &allocations->tensors[index];
+  const size_t byte_count = EvalTensorBytes(tensor);
+  int scratch_index = -1;
+  TfLiteStatus result = RequestScratchBufferInArena(byte_count, &scratch_index);
+  if (result != kTfLiteOk) {
+    return -1;
+  }
+
+  return scratch_index;
+}
+
+// Available during Prepare & Eval. Returns nullptr if tensor is not
+// compressed.
+const CompressionTensorData* MicroInterpreterContext::GetTensorCompressionData(
+    const TfLiteNode* node, int tensor_idx) {
+  TFLITE_DCHECK(state_ == InterpreterState::kPrepare ||
+                state_ == InterpreterState::kInvoke);
+
+  const SubgraphAllocations* allocations =
+      &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()];
+  if (allocations->compressed.tensors == nullptr) {
+    return nullptr;
+  }
+  int index = GetInputTensorIndex(node, tensor_idx);
+  if (index == -1) {
+    return nullptr;
+  }
+  return allocations->compressed.tensors[index];
+}
+
+// Only available during Eval. Returns nullptr on failure, otherwise returns a
+// pointer to the scratch buffer.
+void* MicroInterpreterContext::DecompressTensorToScratchBuffer(
+    const TfLiteEvalTensor& tensor,
+    const CompressionTensorData& compression_data, int scratch_buffer_handle) {
+  TFLITE_DCHECK(state_ == InterpreterState::kInvoke);
+
+  return MicroContext::DecompressTensorToScratchBuffer(tensor, compression_data,
+                                                       scratch_buffer_handle);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc
index e44de6b09aa..873ea96ac1e 100644
--- a/tensorflow/lite/micro/micro_interpreter_test.cc
+++ b/tensorflow/lite/micro/micro_interpreter_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_interpreter.h"
 
 #include <cstdint>
+#include <initializer_list>
 
 #include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h"
 #include "tensorflow/lite/micro/compatibility.h"
@@ -108,6 +109,58 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
   TF_LITE_MICRO_EXPECT_EQ(tflite::testing::MockCustom::freed_, true);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(TestInterpreterCompression) {
+  const tflite::Model* model = tflite::testing::GetSimpleMockModelCompressed();
+  TF_LITE_MICRO_EXPECT(nullptr != model);
+  tflite::testing::TestingOpResolver op_resolver;
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          tflite::testing::GetTestingOpResolver(op_resolver));
+
+  constexpr size_t allocator_buffer_size = 2000;
+  uint8_t allocator_buffer[allocator_buffer_size];
+
+  // Create a new scope so that we can test the destructor.
+  {
+    tflite::MicroInterpreter interpreter(model, op_resolver, allocator_buffer,
+                                         allocator_buffer_size);
+    TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+    TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(1), interpreter.inputs_size());
+    TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(1), interpreter.outputs_size());
+
+    TfLiteTensor* input = interpreter.input(0);
+    TF_LITE_MICRO_EXPECT(nullptr != input);
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt16, input->type);
+    TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size);
+    TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]);
+    TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(2), input->bytes);
+    TF_LITE_MICRO_EXPECT(nullptr != input->data.data);
+    static_cast<int16_t*>(input->data.data)[0] = 42;
+
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter.Invoke());
+
+    const std::initializer_list<int16_t> kGolden = {
+        43, 44, 45, 46, 47, 41, 40, 39, 38, 37, 43, 44, 45, 46, 47};
+    const int kGoldenCount = kGolden.size();
+    TfLiteTensor* output = interpreter.output(0);
+    TF_LITE_MICRO_EXPECT(nullptr != output);
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt16, output->type);
+    TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size);
+    TF_LITE_MICRO_EXPECT_EQ(kGoldenCount, output->dims->data[0]);
+    TF_LITE_MICRO_EXPECT_EQ(
+        static_cast<size_t>(kGoldenCount * sizeof(*kGolden.begin())),
+        output->bytes);
+    TF_LITE_MICRO_EXPECT(nullptr != output->data.data);
+    for (int i = 0; i < kGoldenCount; i++) {
+      TF_LITE_MICRO_EXPECT_EQ(static_cast<int16_t*>(output->data.data)[i],
+                              kGolden.begin()[i]);
+    }
+  }
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TF_LITE_MICRO_TEST(TestMultiTenantInterpreter) {
   tflite::testing::TestingOpResolver op_resolver;
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc
index ee76196d255..18addaee5f7 100644
--- a/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -78,14 +78,15 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
       return recorded_node_and_registration_array_data_;
     case RecordedAllocationType::kOpData:
       return recorded_op_data_;
-    // the function MicroPrintf was never reached outside the switch, because
-    // each case has a return. As the intention of the MicroPrintf is to be
-    // called when no matching case is found, a default case was added to
-    // contemplate an invalid allocation type
+#ifdef USE_TFLM_COMPRESSION
+    case RecordedAllocationType::kCompressionData:
+      return recorded_compression_data_;
+#endif  // USE_TFLM_COMPRESSION
     default:
-      MicroPrintf("Invalid allocation type supplied: %d", allocation_type);
-      return RecordedAllocation();
+      break;
   }
+  MicroPrintf("Invalid allocation type supplied: %d", allocation_type);
+  return RecordedAllocation();
 }
 
 const RecordingSingleArenaBufferAllocator*
@@ -117,6 +118,13 @@ void RecordingMicroAllocator::PrintAllocations() const {
                           "NodeAndRegistration structs");
   PrintRecordedAllocation(RecordedAllocationType::kOpData,
                           "Operator runtime data", "OpData structs");
+
+#ifdef USE_TFLM_COMPRESSION
+
+  PrintRecordedAllocation(RecordedAllocationType::kCompressionData,
+                          "Persistent compression data", "allocations");
+
+#endif  // USE_TFLM_COMPRESSION
 }
 
 void* RecordingMicroAllocator::AllocatePersistentBuffer(size_t bytes) {
@@ -233,6 +241,21 @@ TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
   return status;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TfLiteStatus RecordingMicroAllocator::AllocateCompressedTensorsList(
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  RecordedAllocation allocations = SnapshotAllocationUsage();
+
+  TfLiteStatus status = MicroAllocator::AllocateCompressedTensorsList(
+      model, subgraph_allocations);
+
+  RecordAllocationUsage(allocations, recorded_compression_data_);
+  return status;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const {
   return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(),
           /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(),
diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h
index b6f69264dc0..80f163240d3 100644
--- a/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/tensorflow/lite/micro/recording_micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -33,6 +33,11 @@ enum class RecordedAllocationType {
   kTfLiteTensorVariableBufferData,
   kNodeAndRegistrationArray,
   kOpData,
+#ifdef USE_TFLM_COMPRESSION
+  kCompressionData,
+#endif  // USE_TFLM_COMPRESSION
+
+  kNumAllocationTypes,  // must be last
 };
 
 // Container for holding information about allocation recordings by a given
@@ -93,6 +98,13 @@ class RecordingMicroAllocator : public MicroAllocator {
                                                   int subgraph_index,
                                                   bool allocate_temp) override;
 
+#ifdef USE_TFLM_COMPRESSION
+
+  TfLiteStatus AllocateCompressedTensorsList(
+      const Model* model, SubgraphAllocations* subgraph_allocations) override;
+
+#endif  // USE_TFLM_COMPRESSION
+
  private:
   RecordingMicroAllocator(RecordingSingleArenaBufferAllocator* memory_allocator,
                           MicroMemoryPlanner* memory_planner);
@@ -113,6 +125,9 @@ class RecordingMicroAllocator : public MicroAllocator {
   RecordedAllocation recorded_persistent_buffer_data_ = {};
   RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
   RecordedAllocation recorded_node_and_registration_array_data_ = {};
+#ifdef USE_TFLM_COMPRESSION
+  RecordedAllocation recorded_compression_data_ = {};
+#endif  // USE_TFLM_COMPRESSION
 
   // TODO(b/187993291): Re-enable OpData allocating tracking.
   RecordedAllocation recorded_op_data_ = {};
diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc
index 9d3a5965de4..121a74c3324 100644
--- a/tensorflow/lite/micro/recording_micro_allocator_test.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -317,6 +317,72 @@ TF_LITE_MICRO_TEST(TestMultiSubgraphModel) {
                           num_tensors * TF_LITE_EVAL_TENSOR_STRUCT_SIZE);
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+TF_LITE_MICRO_TEST(TestCompressedModel) {
+  tflite::ScratchBufferHandle* scratch_buffer_handles = nullptr;
+  tflite::testing::TestingOpResolver ops_resolver;
+  const tflite::Model* model = tflite::testing::GetSimpleMockModelCompressed();
+  const int arena_size = 2048;
+
+  uint8_t arena[arena_size];
+
+  tflite::RecordingMicroAllocator* micro_allocator =
+      tflite::RecordingMicroAllocator::Create(arena, arena_size);
+  TF_LITE_MICRO_EXPECT(micro_allocator != nullptr);
+  TF_LITE_MICRO_CHECK_FAIL();
+
+  tflite::SubgraphAllocations* subgraph_allocations =
+      micro_allocator->StartModelAllocation(model);
+  TF_LITE_MICRO_EXPECT(nullptr != subgraph_allocations);
+  TF_LITE_MICRO_CHECK_FAIL();
+
+  TfLiteStatus status = micro_allocator->FinishModelAllocation(
+      model, subgraph_allocations, &scratch_buffer_handles);
+  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
+  TF_LITE_MICRO_CHECK_FAIL();
+
+  micro_allocator->PrintAllocations();
+
+  size_t count_compression_allocations = 0;
+  size_t size_compression_allocations = 0;
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const tflite::CompressionTensorData** ctl =
+        subgraph_allocations[subgraph_idx].compressed.tensors;
+    if (ctl == nullptr) {
+      continue;
+    }
+    const tflite::SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    const size_t num_tensors = subgraph->tensors()->size();
+    for (size_t i = 0; i < num_tensors; i++) {
+      if (ctl[i] != nullptr) {
+        count_compression_allocations++;
+        size_compression_allocations += sizeof(tflite::CompressionTensorData);
+        count_compression_allocations++;
+        size_compression_allocations += sizeof(tflite::LookupTableData);
+      }
+    }
+    // Add the CompressionTensorData array
+    count_compression_allocations++;
+    size_compression_allocations +=
+        num_tensors * sizeof(tflite::CompressionTensorData*);
+  }
+
+  tflite::RecordedAllocation recorded_allocation =
+      micro_allocator->GetRecordedAllocation(
+          tflite::RecordedAllocationType::kCompressionData);
+
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count,
+                          count_compression_allocations);
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
+                          size_compression_allocations);
+  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
+                          size_compression_allocations);
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 // TODO(b/158124094): Find a way to audit OpData allocations on
 // cross-architectures.
 
diff --git a/tensorflow/lite/micro/test_helper_custom_ops.cc b/tensorflow/lite/micro/test_helper_custom_ops.cc
index 374aabcc9df..97577699961 100644
--- a/tensorflow/lite/micro/test_helper_custom_ops.cc
+++ b/tensorflow/lite/micro/test_helper_custom_ops.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -35,6 +35,18 @@ limitations under the License.
 namespace tflite {
 namespace testing {
 
+namespace {
+
+template <typename T>
+void BroadcastAdd(const T input_scalar, const T* weights, T* output,
+                  const size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    output[i] = input_scalar + weights[i];
+  }
+}
+
+}  // namespace
+
 const TFLMRegistration* PackerOp::getRegistration() {
   return GetMutableRegistration();
 }
@@ -107,5 +119,180 @@ TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) {
 
 bool PackerOp::freed_ = false;
 
+const TFLMRegistration* BroadcastAddOp::getRegistration() {
+  return GetMutableRegistration();
+}
+
+TFLMRegistration* BroadcastAddOp::GetMutableRegistration() {
+  static TFLMRegistration r;
+  r.init = Init;
+  r.prepare = Prepare;
+  r.invoke = Invoke;
+  return &r;
+}
+
+void* BroadcastAddOp::Init(TfLiteContext* context, const char* buffer,
+                           size_t length) {
+#ifdef USE_TFLM_COMPRESSION
+
+  weight_scratch_index_ = -1;
+
+#endif  // USE_TFLM_COMPRESSION
+
+  // Do nothing.
+  return nullptr;
+}
+
+TfLiteStatus BroadcastAddOp::Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* weights = micro_context->AllocateTempInputTensor(node, 1);
+  TF_LITE_ENSURE(context, weights != nullptr);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, weights->type);
+  TF_LITE_ENSURE(
+      context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt8 ||
+                   input->type == kTfLiteInt16 || input->type == kTfLiteInt32 ||
+                   input->type == kTfLiteInt64);
+  TF_LITE_ENSURE(context, input->quantization.type == kTfLiteNoQuantization);
+  TF_LITE_ENSURE(context, weights->quantization.type == kTfLiteNoQuantization);
+  TF_LITE_ENSURE(context, output->quantization.type == kTfLiteNoQuantization);
+  TF_LITE_ENSURE(context,
+                 ElementCount(*weights->dims) == ElementCount(*output->dims));
+  TF_LITE_ENSURE(context, ElementCount(*input->dims) == 1);
+  TF_LITE_ENSURE(context, input->dims->size == 1);
+  TF_LITE_ENSURE(context, weights->dims->size == 1);
+
+#ifdef USE_TFLM_COMPRESSION
+
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  weight_scratch_index_ =
+      micro_context->AllocateDecompressionScratchBuffer(node, 1);
+  if (micro_context->IsTensorCompressed(node, 1)) {
+    TF_LITE_ENSURE(context, weight_scratch_index_ != -1);
+  } else {
+    TF_LITE_ENSURE(context, weight_scratch_index_ == -1);
+  }
+
+#endif  // USE_TFLM_COMPRESSION
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(weights);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus BroadcastAddOp::Invoke(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  const TfLiteEvalTensor* weights =
+      tflite::micro::GetEvalInput(context, node, 1);
+  TF_LITE_ENSURE(context, weights != nullptr);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+#ifdef USE_TFLM_COMPRESSION
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  const CompressionTensorData* weights_comp_td =
+      micro_context->GetTensorCompressionData(node, 1);
+  if (micro_context->IsTensorCompressed(node, 1)) {
+    TF_LITE_ENSURE(context, weights_comp_td != nullptr);
+  } else {
+    TF_LITE_ENSURE(context, weights_comp_td == nullptr);
+  }
+
+#endif  // USE_TFLM_COMPRESSION
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      BroadcastAdd(
+          tflite::micro::GetTensorData<float>(input)[0],
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(
+              micro_context, weights, weights_comp_td, weight_scratch_index_),
+#else   // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(weights),
+#endif  // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(output),
+          ElementCount(*output->dims));
+    } break;
+
+    case kTfLiteInt8: {
+      BroadcastAdd(
+          tflite::micro::GetTensorData<int8_t>(input)[0],
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int8_t>(
+              micro_context, weights, weights_comp_td, weight_scratch_index_),
+#else   // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int8_t>(weights),
+#endif  // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int8_t>(output),
+          ElementCount(*output->dims));
+    } break;
+
+    case kTfLiteInt16: {
+      BroadcastAdd(
+          tflite::micro::GetTensorData<int16_t>(input)[0],
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int16_t>(
+              micro_context, weights, weights_comp_td, weight_scratch_index_),
+#else   // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int16_t>(weights),
+#endif  // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int16_t>(output),
+          ElementCount(*output->dims));
+    } break;
+
+    case kTfLiteInt32: {
+      BroadcastAdd(
+          tflite::micro::GetTensorData<int32_t>(input)[0],
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int32_t>(
+              micro_context, weights, weights_comp_td, weight_scratch_index_),
+#else   // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int32_t>(weights),
+#endif  // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int32_t>(output),
+          ElementCount(*output->dims));
+    } break;
+
+    case kTfLiteInt64: {
+      BroadcastAdd(
+          tflite::micro::GetTensorData<int64_t>(input)[0],
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int64_t>(
+              micro_context, weights, weights_comp_td, weight_scratch_index_),
+#else   // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int64_t>(weights),
+#endif  // USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<int64_t>(output),
+          ElementCount(*output->dims));
+    } break;
+
+    default: {
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(input->type), input->type);
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+#ifdef USE_TFLM_COMPRESSION
+
+int BroadcastAddOp::weight_scratch_index_ = -1;
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace testing
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/test_helper_custom_ops.h b/tensorflow/lite/micro/test_helper_custom_ops.h
index d28bb4038f1..53a8cc3bdd4 100644
--- a/tensorflow/lite/micro/test_helper_custom_ops.h
+++ b/tensorflow/lite/micro/test_helper_custom_ops.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,6 +43,23 @@ class PackerOp {
   static bool freed_;
 };
 
+// This op optionally supports compressed weights
+class BroadcastAddOp {
+ public:
+  static const TFLMRegistration* getRegistration();
+  static TFLMRegistration* GetMutableRegistration();
+  static void* Init(TfLiteContext* context, const char* buffer, size_t length);
+  static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
+  static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
+
+ private:
+#ifdef USE_TFLM_COMPRESSION
+
+  static int weight_scratch_index_;  // decompression scratch buffer index
+
+#endif  // USE_TFLM_COMPRESSION
+};
+
 }  // namespace testing
 }  // namespace tflite
 
diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc
index 3f0f5ec0826..33535ec8664 100644
--- a/tensorflow/lite/micro/test_helpers.cc
+++ b/tensorflow/lite/micro/test_helpers.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/lite/micro/test_helpers.h"
 
+#include <array>
 #include <cstdarg>
 #include <cstddef>
 #include <cstdint>
@@ -33,6 +34,12 @@ limitations under the License.
 #include "tensorflow/lite/micro/test_helper_custom_ops.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
+#ifdef USE_TFLM_COMPRESSION
+
+#include "tensorflow/lite/micro/compression/metadata_generated.h"
+
+#endif  // USE_TFLM_COMPRESSION
+
 // TODO(b/170464050): Use TFLM test only version of schema_utils.
 
 namespace tflite {
@@ -236,7 +243,7 @@ const Model* ModelBuilder::BuildModel(
         *builder_, 0,
         builder_->CreateVector(operator_codes_, next_operator_code_id_),
         builder_->CreateVector(subgraphs, subgraphs_size),
-        builder_->CreateString("teset_model"),
+        builder_->CreateString("test_model"),
         builder_->CreateVector(buffers, buffer_size), 0,
         builder_->CreateVector(metadata_,
                                ModelBuilder::nbr_of_metadata_buffers_));
@@ -245,7 +252,7 @@ const Model* ModelBuilder::BuildModel(
         *builder_, 0,
         builder_->CreateVector(operator_codes_, next_operator_code_id_),
         builder_->CreateVector(subgraphs, subgraphs_size),
-        builder_->CreateString("teset_model"),
+        builder_->CreateString("test_model"),
         builder_->CreateVector(buffers, buffer_size));
   }
 
@@ -578,6 +585,116 @@ const Model* BuildSimpleMockModel() {
   return model;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+const flatbuffers::span<uint8_t> BuildLutMetadata(uint tensor_index,
+                                                  uint value_table_buffer_index,
+                                                  uint bit_width) {
+  using flatbuffers::Offset;
+  namespace compression = tflite::micro::compression;
+
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  auto lut_tensor = compression::CreateLutTensor(
+      *builder, tensor_index, value_table_buffer_index, bit_width);
+  auto subgraph = compression::CreateSubgraph(
+      *builder, builder->CreateVector(&lut_tensor, 1));
+  auto metadata = compression::CreateMetadata(
+      *builder, builder->CreateVector(&subgraph, 1));
+  compression::FinishMetadataBuffer(*builder, metadata);
+  return builder->GetBufferSpan();
+}
+
+const Model* BuildSimpleMockModelCompressed() {
+  using flatbuffers::Offset;
+  using flatbuffers::Vector;
+  using tflite::micro::compression::LutTensor;
+  constexpr uint kEmptyBuffer = 0;
+  constexpr uint kMetadataBuffer = 1;
+  constexpr uint kWeightsBuffer = 2;
+  constexpr uint kValueTableBuffer = 3;
+  // constexpr uint kInputTensor = 0;
+  constexpr uint kWeightsTensor = 1;
+  // constexpr uint kOutputTensor = 2;
+  constexpr uint kCompressedBitWidth = 4;
+
+  auto lut_tensors_span =
+      BuildLutMetadata(kWeightsTensor, kValueTableBuffer, kCompressedBitWidth);
+
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  // [1, 2, 3, 4, 5, -1, -2, -3, -4, -5, 1, 2, 3, 4, 5]
+  const std::initializer_list<uint8_t> weights_data = {0x01, 0x23, 0x45, 0x98,
+                                                       0x76, 0x01, 0x23, 0x40};
+  const std::initializer_list<int16_t> value_table_data = {1,  2,  3,  4,  5,
+                                                           -1, -5, -4, -3, -2};
+  auto value_table_offset = builder->CreateVector(value_table_data).o;
+  const std::initializer_list<Offset<Buffer>> buffers = {
+      CreateBuffer(*builder),
+      CreateBuffer(*builder, builder->CreateVector<uint8_t>(lut_tensors_span)),
+      CreateBuffer(*builder, builder->CreateVector(weights_data)),
+      CreateBuffer(*builder, Offset<Vector<uint8_t>>(value_table_offset)),
+  };
+
+  const std::initializer_list<int32_t> input_shape = {1};
+  const std::initializer_list<int32_t> weights_shape = {15};
+  const std::initializer_list<int32_t> output_shape = weights_shape;
+  const std::initializer_list<Offset<Tensor>> tensors = {
+      CreateTensor(*builder, builder->CreateVector(input_shape),
+                   TensorType_INT16, kEmptyBuffer,
+                   builder->CreateString("test_input_tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(weights_shape),
+                   TensorType_INT16, kWeightsBuffer,
+                   builder->CreateString("test_weight_tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(output_shape),
+                   TensorType_INT16, kEmptyBuffer,
+                   builder->CreateString("test_output_tensor"), 0, false),
+  };
+
+  const std::initializer_list<int32_t> subgraph_inputs = {0};
+  const std::initializer_list<int32_t> subgraph_outputs = {2};
+  const std::initializer_list<int32_t> operator_inputs = {0, 1};
+  const std::initializer_list<int32_t> operator_outputs = {2};
+  const std::initializer_list<Offset<Operator>> operators = {
+      CreateOperator(*builder, 0, builder->CreateVector(operator_inputs),
+                     builder->CreateVector(operator_outputs),
+                     BuiltinOptions_NONE),
+  };
+
+  const std::initializer_list<Offset<SubGraph>> subgraphs = {
+      CreateSubGraph(*builder, builder->CreateVector(tensors),
+                     builder->CreateVector(subgraph_inputs),
+                     builder->CreateVector(subgraph_outputs),
+                     builder->CreateVector(operators),
+                     builder->CreateString("test_subgraph")),
+  };
+
+  const std::initializer_list<Offset<OperatorCode>> operator_codes = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "broadcast_add_op",
+                               /*version=*/0, BuiltinOperator_CUSTOM),
+  };
+
+  const std::initializer_list<Offset<Metadata>> metadata = {
+      CreateMetadata(*builder,
+                     builder->CreateString(kCompressionMetadataString),
+                     kMetadataBuffer),
+  };
+
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes),
+      builder->CreateVector(subgraphs), builder->CreateString("test_model"),
+      builder->CreateVector(buffers), 0, builder->CreateVector(metadata));
+
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+
+  return model;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 const Model* BuildComplexMockModel() {
   using flatbuffers::Offset;
   flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@@ -1665,6 +1782,8 @@ TfLiteStatus GetTestingOpResolver(
       op_resolver.AddCustom("no_op", NoOp::GetMutableRegistration()));
   TF_LITE_ENSURE_STATUS(op_resolver.AddCustom(
       "custom_packer_op", PackerOp::GetMutableRegistration()));
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCustom(
+      "broadcast_add_op", BroadcastAddOp::GetMutableRegistration()));
   TF_LITE_ENSURE_STATUS(op_resolver.AddIf());
   return kTfLiteOk;
 }
@@ -1698,6 +1817,18 @@ const Model* GetSimpleMockModel() {
   return model;
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+const Model* GetSimpleMockModelCompressed() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleMockModelCompressed());
+  }
+  return model;
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 const Model* GetSimpleMultipleInputsModel() {
   static Model* model = nullptr;
   if (!model) {
@@ -1890,100 +2021,6 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
   return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
 }
 
-TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
-                                       TfLiteIntArray* dims, float input_scale,
-                                       float weights_scale, bool is_variable) {
-  float bias_scale = input_scale * weights_scale;
-  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
-
-  // Quantized int16_t tensors always have a zero point of 0, since the range of
-  // int16_t values is large, and because zero point costs extra cycles during
-  // processing.
-  TfLiteTensor result =
-      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
-  return result;
-}
-
-TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
-                                       TfLiteIntArray* dims, float input_scale,
-                                       float weights_scale, bool is_variable) {
-  float bias_scale = input_scale * weights_scale;
-  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
-
-  // Quantized int32_t tensors always have a zero point of 0, since the range of
-  // int32_t values is large, and because zero point costs extra cycles during
-  // processing.
-  TfLiteTensor result =
-      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
-  return result;
-}
-
-TfLiteTensor CreateQuantizedBiasTensor(const float* data,
-                                       std::int64_t* quantized,
-                                       TfLiteIntArray* dims, float input_scale,
-                                       float weights_scale, bool is_variable) {
-  float bias_scale = input_scale * weights_scale;
-  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
-
-  // Quantized int32_t tensors always have a zero point of 0, since the range of
-  // int32_t values is large, and because zero point costs extra cycles during
-  // processing.
-  TfLiteTensor result =
-      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
-  return result;
-}
-
-// Quantizes int32_t bias tensor with per-channel weights determined by input
-// scale multiplied by weight scale for each channel.
-template <typename T>
-TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, T* quantized, TfLiteIntArray* dims, float input_scale,
-    float* weight_scales, float* scales, int* zero_points,
-    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
-    bool is_variable) {
-  int input_size = ElementCount(*dims);
-  int num_channels = dims->data[quantized_dimension];
-  // First element is reserved for array length
-  zero_points[0] = num_channels;
-  scales[0] = static_cast<float>(num_channels);
-  float* scales_array = &scales[1];
-  for (int i = 0; i < num_channels; i++) {
-    scales_array[i] = input_scale * weight_scales[i];
-    zero_points[i + 1] = 0;
-  }
-
-  SymmetricPerChannelQuantize<T>(input, quantized, input_size, num_channels,
-                                 scales_array);
-
-  affine_quant->scale = FloatArrayFromFloats(scales);
-  affine_quant->zero_point = IntArrayFromInts(zero_points);
-  affine_quant->quantized_dimension = quantized_dimension;
-
-  TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
-  result.quantization = {kTfLiteAffineQuantization, affine_quant};
-  return result;
-}
-
-TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, int32_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
-    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
-    bool is_variable) {
-  return CreatePerChannelQuantizedBiasTensor<int32_t>(
-      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
-      affine_quant, quantized_dimension, is_variable);
-}
-
-TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
-    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
-    bool is_variable) {
-  return CreatePerChannelQuantizedBiasTensor<std::int64_t>(
-      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
-      affine_quant, quantized_dimension, is_variable);
-}
-
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
     const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
     int* zero_points, TfLiteAffineQuantization* affine_quant,
diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h
index 6315b9fecdc..6831b467bc8 100644
--- a/tensorflow/lite/micro/test_helpers.h
+++ b/tensorflow/lite/micro/test_helpers.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -31,6 +31,13 @@ limitations under the License.
 #include "tensorflow/lite/portable_type_to_tflitetype.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
+#ifdef USE_TFLM_COMPRESSION
+
+#include "tensorflow/lite/micro/compression.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+#endif  // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
+
 namespace tflite {
 namespace testing {
 
@@ -112,6 +119,15 @@ TfLiteStatus GetTestingOpResolver(TestingOpResolver& op_resolver);
 // 1 layer of weights, 1 output Tensor, and 1 operator.
 const Model* GetSimpleMockModel();
 
+#ifdef USE_TFLM_COMPRESSION
+
+// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
+// 1 layer of weights, 1 output Tensor, and 1 operator (BroadcastAddOp).  The
+// weights tensor is compressed.
+const Model* GetSimpleMockModelCompressed();
+
+#endif  // USE_TFLM_COMPRESSION
+
 // Returns a flatbuffer TensorFlow Lite model with more inputs, variable
 // tensors, and operators.
 const Model* GetComplexMockModel();
@@ -220,8 +236,6 @@ TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims,
   result.is_variable = is_variable;
   result.allocation_type = kTfLiteMemNone;
   result.data.data = const_cast<T*>(data);
-  result.bytes = ElementCount(*dims) * sizeof(T);
-  result.data.data = const_cast<T*>(data);
 
   if (type == kTfLiteInt4) {
     result.type = kTfLiteInt4;
@@ -233,7 +247,13 @@ TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims,
     // a single CreateTensor method. A Const array should be used for immutable
     // input tensors and non-const array should be used for mutable and output
     // tensors.
-    result.type = typeToTfLiteType<T>();
+    if (type == kTfLiteNoType) {
+      result.type = typeToTfLiteType<T>();
+    } else {
+      result.type = type;
+    }
+
+    result.bytes = ElementCount(*dims) * TfLiteTypeGetSize(result.type);
   }
   return result;
 }
@@ -260,37 +280,95 @@ TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
                                type);
 }
 
-TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
+template <typename T>
+TfLiteTensor CreateQuantizedBiasTensor(const float* data, T* quantized,
                                        TfLiteIntArray* dims, float input_scale,
                                        float weights_scale,
-                                       bool is_variable = false);
+                                       bool is_variable = false) {
+  float bias_scale = input_scale * weights_scale;
+  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
+
+  // Quantized bias tensors always have a zero point of 0, since the range of
+  // values is large, and because zero point costs extra cycles during
+  // processing.
+  TfLiteTensor result =
+      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
+  return result;
+}
 
-TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
-                                       TfLiteIntArray* dims, float input_scale,
-                                       float weights_scale,
-                                       bool is_variable = false);
+// Creates bias tensor with input data, and per-channel weights determined by
+// input scale multiplied by weight scale for each channel.  Input data will not
+// be quantized.
+template <typename T>
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const T* input_data, TfLiteIntArray* dims, float input_scale,
+    const TfLiteFloatArray* weight_scales, TfLiteFloatArray* scales,
+    TfLiteIntArray* zero_points, TfLiteAffineQuantization* affine_quant,
+    int quantized_dimension, bool is_variable = false,
+    TfLiteType type = kTfLiteNoType) {
+  int num_channels = dims->data[quantized_dimension];
+  zero_points->size = num_channels;
+  scales->size = num_channels;
+  for (int i = 0; i < num_channels; i++) {
+    scales->data[i] = input_scale * weight_scales->data[i];
+    zero_points->data[i] = 0;
+  }
 
-TfLiteTensor CreateQuantizedBiasTensor(const float* data,
-                                       std::int64_t* quantized,
-                                       TfLiteIntArray* dims, float input_scale,
-                                       float weights_scale,
-                                       bool is_variable = false);
+  affine_quant->scale = scales;
+  affine_quant->zero_point = zero_points;
+  affine_quant->quantized_dimension = quantized_dimension;
 
-// Quantizes int32_t bias tensor with per-channel weights determined by input
-// scale multiplied by weight scale for each channel.
-TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, int32_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
-    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
-    bool is_variable = false);
+  TfLiteTensor result = CreateTensor(input_data, dims, is_variable, type);
+  result.quantization = {kTfLiteAffineQuantization, affine_quant};
+  return result;
+}
 
-// Quantizes int64_t bias tensor with per-channel weights determined by input
+// Quantizes bias tensor with per-channel weights determined by input
 // scale multiplied by weight scale for each channel.
+template <typename T>
 TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    const float* input, T* quantized, TfLiteIntArray* dims, float input_scale,
+    const float* weight_scales, float* scales, int* zero_points,
     TfLiteAffineQuantization* affine_quant, int quantized_dimension,
-    bool is_variable = false);
+    bool is_variable = false) {
+  int input_size = ElementCount(*dims);
+  int num_channels = dims->data[quantized_dimension];
+  // First element is reserved for array length
+  zero_points[0] = num_channels;
+  scales[0] = static_cast<float>(num_channels);
+  float* scales_array = &scales[1];
+  for (int i = 0; i < num_channels; i++) {
+    scales_array[i] = input_scale * weight_scales[i];
+    zero_points[i + 1] = 0;
+  }
+
+  SymmetricPerChannelQuantize<T>(input, quantized, input_size, num_channels,
+                                 scales_array);
+
+  affine_quant->scale = FloatArrayFromFloats(scales);
+  affine_quant->zero_point = IntArrayFromInts(zero_points);
+  affine_quant->quantized_dimension = quantized_dimension;
+
+  TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
+  result.quantization = {kTfLiteAffineQuantization, affine_quant};
+
+  return result;
+}
+
+template <typename T>
+TfLiteTensor CreatePerChannelQuantizedTensor(
+    const T* quantized, TfLiteIntArray* dims, TfLiteFloatArray* scales,
+    TfLiteIntArray* zero_points, TfLiteAffineQuantization* affine_quant,
+    int quantized_dimension, bool is_variable = false,
+    TfLiteType type = kTfLiteNoType) {
+  affine_quant->scale = scales;
+  affine_quant->zero_point = zero_points;
+  affine_quant->quantized_dimension = quantized_dimension;
+
+  TfLiteTensor result = CreateTensor(quantized, dims, is_variable, type);
+  result.quantization = {kTfLiteAffineQuantization, affine_quant};
+  return result;
+}
 
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
     const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
@@ -329,6 +407,128 @@ inline int ZeroPointFromMinMax(const float min, const float max) {
          static_cast<int>(roundf(-min / ScaleFromMinMax<T>(min, max)));
 }
 
+#ifdef USE_TFLM_COMPRESSION
+
+template <typename TFILTER, typename TBIAS>
+struct TestCompressionInfo {
+  TFILTER* filter_value_table;
+  size_t filter_value_table_stride;
+  int filter_bit_width;
+  TBIAS* bias_value_table;
+  size_t bias_value_table_stride;
+  int bias_bit_width;
+  CompressionScheme scheme;
+};
+
+template <typename TBIAS>
+struct TestCompressionQuantizedInfo : TestCompressionInfo<int8_t, TBIAS> {
+  const uint8_t* filter_compressed;
+  const float* filter_data;
+  const int* filter_dims_data;    // TfLiteIntArray
+  const float* filter_scales;     // TfLiteFloatArray
+  const int* filter_zero_points;  // TfLiteIntArray
+
+  const uint8_t* bias_compressed;
+  const float* bias_data;
+  const int* bias_dims_data;  // TfLiteIntArray
+  float* bias_scales;         // TfLiteFloatArray (computed)
+  int* bias_zero_points;      // TfLiteIntArray (computed)
+};
+
+template <int N, typename TW, typename TB>
+class TestCompressedList {
+ public:
+  TfLiteStatus AddWeight(const TestCompressionInfo<TW, TB>& tci,
+                         const TfLiteTensor& tensor,
+                         const size_t tensor_index) {
+    filter_comp_data_.data.lut_data = &filter_lut_;
+    filter_comp_data_.scheme = tci.scheme;
+    filter_comp_data_.data.lut_data->compressed_bit_width =
+        tci.filter_bit_width;
+    filter_comp_data_.data.lut_data->value_table = tci.filter_value_table;
+    filter_comp_data_.data.lut_data->value_table_channel_stride =
+        tci.filter_value_table_stride;
+    filter_comp_data_.data.lut_data->is_per_channel_quantized =
+        IsPerChannelQuantized(tensor);
+    filter_comp_data_.data.lut_data->use_alternate_axis = UsesAltAxis(tensor);
+    return SetCompressionData(tensor_index, filter_comp_data_);
+  }
+
+  TfLiteStatus AddBias(const TestCompressionInfo<TW, TB>& tci,
+                       const TfLiteTensor& tensor, const size_t tensor_index) {
+    bias_comp_data_.data.lut_data = &bias_lut_;
+    bias_comp_data_.scheme = tci.scheme;
+    bias_comp_data_.data.lut_data->compressed_bit_width = tci.bias_bit_width;
+    bias_comp_data_.data.lut_data->value_table = tci.bias_value_table;
+    bias_comp_data_.data.lut_data->value_table_channel_stride =
+        tci.bias_value_table_stride;
+    bias_comp_data_.data.lut_data->is_per_channel_quantized =
+        IsPerChannelQuantized(tensor);
+    bias_comp_data_.data.lut_data->use_alternate_axis = UsesAltAxis(tensor);
+    return SetCompressionData(tensor_index, bias_comp_data_);
+  }
+  const CompressedTensorList* GetCompressedTensorList() { return &ctl_; }
+
+ private:
+  LookupTableData filter_lut_ = {};
+  CompressionTensorData filter_comp_data_ = {};
+  LookupTableData bias_lut_ = {};
+  CompressionTensorData bias_comp_data_ = {};
+  const CompressionTensorData* ctdp_[N] = {};
+  const CompressedTensorList ctl_ = {ctdp_};
+
+  TfLiteStatus SetCompressionData(const size_t tensor_index,
+                                  const CompressionTensorData& cd) {
+    TFLITE_DCHECK_LT(tensor_index, N);
+    TFLITE_DCHECK(cd.data.lut_data->value_table != nullptr);
+    TFLITE_DCHECK(cd.data.lut_data->value_table_channel_stride != 0);
+
+    if (cd.scheme != CompressionScheme::kBinQuant) {
+      MicroPrintf("TestCompressedList: unsupported compression scheme");
+      return kTfLiteError;
+    }
+    if (ctdp_[tensor_index] != nullptr) {
+      MicroPrintf("TestCompressedList: tensor index %d already in use",
+                  tensor_index);
+      return kTfLiteError;
+    }
+
+    ctdp_[tensor_index] = &cd;
+    return kTfLiteOk;
+  }
+
+  bool IsPerChannelQuantized(const TfLiteTensor& tensor) {
+    if (tensor.quantization.type == kTfLiteAffineQuantization &&
+        tensor.quantization.params != nullptr) {
+      const TfLiteAffineQuantization* qp =
+          static_cast<const TfLiteAffineQuantization*>(
+              tensor.quantization.params);
+      if (qp->scale->size > 1) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  bool UsesAltAxis(const TfLiteTensor& tensor) {
+    if (tensor.quantization.type == kTfLiteAffineQuantization &&
+        tensor.quantization.params != nullptr) {
+      const TfLiteAffineQuantization* qp =
+          static_cast<const TfLiteAffineQuantization*>(
+              tensor.quantization.params);
+      if (qp->quantized_dimension != 0) {
+        TFLITE_DCHECK_EQ(qp->quantized_dimension, tensor.dims->size - 1);
+        return true;
+      }
+    }
+
+    return false;
+  }
+};
+
+#endif  // USE_TFLM_COMPRESSION
+
 }  // namespace testing
 }  // namespace tflite
 
diff --git a/tensorflow/lite/micro/tools/benchmarking/metrics.cc b/tensorflow/lite/micro/tools/benchmarking/metrics.cc
index 3a4bf7e4917..f71a4cd139e 100644
--- a/tensorflow/lite/micro/tools/benchmarking/metrics.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/metrics.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -46,7 +46,8 @@ struct LogAllocationRecord {
 constexpr int kArenaRows = 3;
 constexpr int kArenaColumns = 3;
 
-constexpr int kAllocationTypes = 7;
+constexpr int kAllocationTypes =
+    static_cast<int>(tflite::RecordedAllocationType::kNumAllocationTypes);
 constexpr int kAllocationColumns = 6;
 
 constexpr int kMaxBufSize = 100;
@@ -85,16 +86,25 @@ LogAllocationRecord GetLogAllocationRecord(
       tflite::RecordedAllocationType::kPersistentBufferData,
       tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData,
       tflite::RecordedAllocationType::kNodeAndRegistrationArray,
-      tflite::RecordedAllocationType::kOpData};
+      tflite::RecordedAllocationType::kOpData,
+#ifdef USE_TFLM_COMPRESSION
+      tflite::RecordedAllocationType::kCompressionData,
+#endif  // USE_TFLM_COMPRESSION
+  };
   static_assert(std::extent<decltype(types)>::value == kAllocationTypes,
                 "kAllocationTypes mismatch");
-  const char* titles[] = {"Eval tensor data",
-                          "Persistent tensor data",
-                          "Persistent quantization data",
-                          "Persistent buffer data",
-                          "Tensor variable buffer data",
-                          "Node and registration array",
-                          "Operation data"};
+  const char* titles[] = {
+      "Eval tensor data",
+      "Persistent tensor data",
+      "Persistent quantization data",
+      "Persistent buffer data",
+      "Tensor variable buffer data",
+      "Node and registration array",
+      "Operation data",
+#ifdef USE_TFLM_COMPRESSION
+      "Compression data",
+#endif  // USE_TFLM_COMPRESSION
+  };
   static_assert(std::extent<decltype(titles)>::value == kAllocationTypes,
                 "kAllocationTypes mismatch");
   const size_t total_bytes =
diff --git a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
index 998827f24de..f5392dddeec 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
@@ -41,6 +41,12 @@ readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/M
 readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
 readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
 
+# optional TFLM tensor compression - execute the unit tests
+readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  USE_TFLM_COMPRESSION=yes
+
 # run generic benchmark
 readable_run make -j$(nproc) -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
   TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
index 3bf2b549316..e837e9d33b9 100644
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -167,6 +167,7 @@ endif
 COMMON_FLAGS := \
   -Werror \
   -fno-unwind-tables \
+  -fno-asynchronous-unwind-tables \
   -ffunction-sections \
   -fdata-sections \
   -fmessage-length=0 \
@@ -263,6 +264,17 @@ endif
 # runtime that can be linked in to other programs.
 MICROLITE_LIB_NAME := libtensorflow-microlite.a
 
+# TFLM optional compression support (default disabled)
+ENABLE_COMPRESSION := no
+ifneq ($(USE_TFLM_COMPRESSION),)
+  # currently only Linux targets supported
+  ifeq ($(TARGET), $(filter $(TARGET), linux))
+    CXXFLAGS += -DUSE_TFLM_COMPRESSION
+    CCFLAGS += -DUSE_TFLM_COMPRESSION
+    ENABLE_COMPRESSION := yes
+  endif
+endif
+
 # Where compiled objects are stored.
 BASE_GENDIR := gen
 GENDIR := $(BASE_GENDIR)/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)
@@ -272,6 +284,9 @@ endif
 ifneq ($(CO_PROCESSOR),)
   GENDIR := $(GENDIR)_$(CO_PROCESSOR)
 endif
+ifeq ($(ENABLE_COMPRESSION), yes)
+  GENDIR := $(GENDIR)_compression
+endif
 GENDIR := $(GENDIR)_$(TOOLCHAIN)/
 
 CORE_OBJDIR := $(GENDIR)obj/core/