Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
ddavis-2015 committed Aug 8, 2024
1 parent d3475aa commit 26a153e
Show file tree
Hide file tree
Showing 40 changed files with 3,887 additions and 382 deletions.
2 changes: 1 addition & 1 deletion tensorflow/lite/micro/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct CompressedTensorList {
// Sparsely populated array with the same number of elements as there are
// tensors in the Subgraph. An alternative would include a tensor index in
// the struct for each and walk the list on look up. This could be slow.
CompressionTensorData** tensors;
const CompressionTensorData** tensors;
};

} // namespace tflite
Expand Down
94 changes: 94 additions & 0 deletions tensorflow/lite/micro/compression/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_py_library")
load("@rules_python//python:defs.bzl", "py_test")
load("@tflm_pip_deps//:requirements.bzl", "requirement")

package(
default_visibility = [
"//visibility:public",
],
)

flatbuffer_cc_library(
name = "metadata_flatbuffer_cc",
srcs = ["metadata.fbs"],
)

flatbuffer_py_library(
name = "original_flatbuffer_py",
srcs = ["original.fbs"],
)

flatbuffer_py_library(
name = "metadata_flatbuffer_py",
srcs = ["metadata.fbs"],
)

cc_test(
name = "metadata_test_cc",
srcs = ["metadata_test.cc"],
deps = [
"metadata_flatbuffer_cc",
"//tensorflow/lite/micro:hexdump",
"@flatbuffers//:runtime_cc",
],
size = "small",
)

py_binary(
name = "compress",
srcs = ["compress.py"],
deps = [
"@absl_py//absl:app",
"@absl_py//absl/flags",
"@absl_py//absl/logging",
"@flatbuffers//:runtime_py",
"metadata_flatbuffer_py",
"//tensorflow/lite/python:schema_py",
requirement("bitarray"),
requirement("numpy"),
requirement("scikit-learn"),
],
)

py_binary(
name = "view",
srcs = [
"view.py",
],
deps = [
"metadata_flatbuffer_py",
"//tensorflow/lite/python:schema_py",
],
)

py_test(
name = "metadata_test_py",
main = "metadata_test.py",
srcs = ["metadata_test.py"],
deps = [
"metadata_flatbuffer_py",
"@flatbuffers//:runtime_py",
requirement("hexdump"),
],
size = "small",
)

py_test(
name = "original_test_py",
main = "original_test.py",
srcs = ["original_test.py"],
deps = [
"original_flatbuffer_py",
"@flatbuffers//:runtime_py",
requirement("hexdump"),
],
size = "small",
)

genrule(
name = "hello_world_int8.compressed",
srcs = ["//tensorflow/lite/micro/examples/hello_world/models:hello_world_int8.tflite"],
outs = ["hello_world_int8.compressed.tflite"],
cmd = "$(location :compress) --input_model_path $< --output_model_path $@",
tools = [":compress"],
)
244 changes: 244 additions & 0 deletions tensorflow/lite/micro/compression/compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Reduces the number of weights in a .tflite model using various strategies."""

# Usage information:
# Default:
# `bazel run tensorflow/lite/micro/tools:compress -- \
# --input_model_path=</path/to/my_model.tflite>` \
# --output_model_path=</path/to/output.tflite>`


from tensorflow.lite.micro.compression import metadata_flatbuffer_py_generated as compression_schema
from tensorflow.lite.python import schema_py_generated as tflite_schema

from absl import app
from absl import flags
from absl import logging
import bitarray
import bitarray.util
import numpy as np
import flatbuffers
import sklearn.cluster
import struct


_INPUT_MODEL_PATH = flags.DEFINE_string(
"input_model_path",
None,
".tflite input model path",
required=True,
)

_TEST_COMPRESSED_MODEL = flags.DEFINE_bool(
"test_compressed_model",
False,
"optional config to test models with random data and"
" report on the differences in output.",
)

_OUTPUT_MODEL_PATH = flags.DEFINE_string(
"output_model_path",
None,
".tflite output path. Leave blank if same as input+.compressed.tflite",
)


def read_model(path):
with open(path, 'rb') as file:
buffer = bytearray(file.read())
return tflite_schema.ModelT.InitFromPackedBuf(buffer, 0)


def write_model(model, path):
builder = flatbuffers.Builder(32)
root = model.Pack(builder)
builder.Finish(root)
buffer: bytearray = builder.Output()

with open(path, 'wb') as file:
file.write(buffer)


def pack_compression_metadata(m):
builder = flatbuffers.Builder(32)
root = m.Pack(builder)
builder.Finish(root)
buffer: bytearray = builder.Output()
return buffer


def pack_lut_indexes(indexes, bitwidth):
"""Pack the sequence of integers given in `indexes` into bitwidth-wide fields
in a buffer, and return the buffer. Raise an OverflowError if any element
does not fit into a bitwidth-wide field. """
ba = bitarray.bitarray(endian="big")
for i in indexes:
field = bitarray.util.int2ba(i, length=bitwidth, endian="big")
ba.extend(field)
return ba.tobytes()


def pack_lut_values(values, struct_format):
"""Pack the `values` into a buffer of bytes, using a `struct_format`
character from the standard module `struct` to determine the type of values
and corresponding encoding into bytes. Always little-endian byte order.
"""
buffer = bytearray()
little_endian = "<"
packer = struct.Struct(little_endian + struct_format)
for v in values:
buffer.extend(packer.pack(v))
return buffer


def unpack_buffer_values(data, struct_format):
little_endian = "<"
unpacker = struct.Struct(little_endian + struct_format)
values = [v[0] for v in unpacker.iter_unpack(bytes(data))]
return values


def tensor_type_to_struct_format(type):
m = {
tflite_schema.TensorType.INT8: "b",
tflite_schema.TensorType.INT16: "h",
tflite_schema.TensorType.FLOAT32: "f",
}
return m[type]


def bq(sequence, num_values):
"""Quantize a sequence of integers, minimizing the total error using k-means
clustering.
Parameters:
sequence :list - a sequence of integers to be quanized
num_values :int - the number of quantization levels
Returns:
(indexes, values): a tuple with the list of indexes and list of values
"""
sequence = np.array(sequence).reshape(-1, 1)
kmeans = sklearn.cluster.KMeans(n_clusters=num_values,
random_state=0).fit(sequence)
values = kmeans.cluster_centers_.flatten()
values = np.round(values).astype(int).tolist()
indexes = kmeans.predict(sequence).tolist()
return (indexes, values)


def compress_tensor(subgraph_id, tensor_id, model):
subgraph = model.subgraphs[subgraph_id]
tensor = subgraph.tensors[tensor_id]
struct_format = tensor_type_to_struct_format(tensor.type)
buffer_id = tensor.buffer
buffer = model.buffers[buffer_id]
sequence = unpack_buffer_values(buffer.data, struct_format)
bitwidth = 2
indexes, values = bq(sequence, 2 ** bitwidth)

# append index buffer
buffer = tflite_schema.BufferT()
buffer.data = pack_lut_indexes(indexes, bitwidth)
model.buffers.append(buffer)
index_id = len(model.buffers) - 1

# append value buffer
buffer = tflite_schema.BufferT()
buffer.data = pack_lut_values(values, struct_format)
model.buffers.append(buffer)
value_id = len(model.buffers) - 1

# create metadata
lut_tensor = compression_schema.LutTensorT()
lut_tensor.subgraph = subgraph_id
lut_tensor.tensor = tensor_id
lut_tensor.indexBitwidth = bitwidth
lut_tensor.indexBuffer = index_id
lut_tensor.valueBuffer = value_id

return lut_tensor


def compress_fully_connected(subgraph_id, operator_id, model):
# On a fully_connected operator, we compress the 2nd
subgraph = model.subgraphs[subgraph_id]
operator = subgraph.operators[operator_id]
tensor_id_2 = operator.inputs[1]
# tensor_id_3 = operator.inputs[2]
lut_tensor_2 = compress_tensor(subgraph_id, tensor_id_2, model)
# lut_tensor_3 = compress_tensor(subgraph_id, tensor_id_2, model)
return (lut_tensor_2,)


def get_opcode_compressions(model):
"""Return a map of operator_code indexes to compression functions, for those
operators we wish to and know how to compress.
"""
compressable = {tflite_schema.BuiltinOperator.FULLY_CONNECTED: compress_fully_connected}
compressions = {}
for index, code in enumerate(model.operatorCodes):
if code.builtinCode in compressable:
compressions[index] = compressable[code.builtinCode]
return compressions


def compress(model):
# Walk op codes, identify those we compress, note index
# Walk operators, match op code indexes, note tensors to compress
# Walk those tensors, creating LUTs in buffers and metadata

compressions = get_opcode_compressions(model)

lut_tensors = []

for subgraph_id, subgraph in enumerate(model.subgraphs):
for operator_id, operator in enumerate(subgraph.operators):
fn = compressions.get(operator.opcodeIndex)
if fn is not None:
result = fn(subgraph_id, operator_id, model)
if result is not None:
lut_tensors.extend(result)

compression_metadata = compression_schema.MetadataT()
compression_metadata.lutTensors = lut_tensors

return compression_metadata


def main(_) -> None:
output_model_path = _OUTPUT_MODEL_PATH.value or (
_INPUT_MODEL_PATH.value.split(".tflite")[0] + ".compressed.tflite")
logging.info("compressing %s to %s", _INPUT_MODEL_PATH.value, output_model_path)

model = read_model(_INPUT_MODEL_PATH.value)

compression_metadata = compress(model)

buffer = tflite_schema.BufferT()
buffer.data = pack_compression_metadata(compression_metadata)
model.buffers.append(buffer)

metadata = tflite_schema.MetadataT()
metadata.name = "COMPRESSION_METADATA"
metadata.buffer = len(model.buffers) - 1
model.metadata.append(metadata)

write_model(model, output_model_path)


if __name__ == "__main__":
app.run(main)
38 changes: 38 additions & 0 deletions tensorflow/lite/micro/compression/metadata.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Flatbuffer schema describing a TFLM compressed model. Use as the value for
// the key "TFLM_COMPRESSION" in the metadata table in a .tflite flatbuffer.

namespace tflite.micro.compression;

table Metadata {
lut_tensors:[LutTensor]; // list of tensors that are compressed by LUT
}

struct LutTensor {
subgraph:uint16; // the index of the subgraph
tensor:uint16; // the index of the tensor in its subgraph
index_bitwidth:uint8; // the bit-width of LUT indexes
index_buffer:uint16; // the index of the buffer containing LUT indexes
value_buffer:uint16; // the index of the buffer containing LUT values
}
// Look-Up-Table tensors are encoded in two buffers: an index buffer and a
// value buffer. The indexes are unsigned integers packed into the index buffer
// in bitwidth-wide bit fields with a big-endian bit order. The data in the
// value buffer is encoded as usual according to the type of the tensor.
// Tensors with multiple channels have distinct values tables for each channel,
// concatinated into one value buffer. (Will elaborate this comment.)

root_type Metadata;
Loading

0 comments on commit 26a153e

Please sign in to comment.