diff --git a/package.js b/package.js index 5684a56d5ce..87d736902a4 100644 --- a/package.js +++ b/package.js @@ -519,6 +519,7 @@ const update = async () => { 'coreml', 'dlc', 'dnn', + 'ggml', 'keras', 'mnn', 'mslite', diff --git a/source/base.js b/source/base.js index 625bcd4d82e..8bc7081a037 100644 --- a/source/base.js +++ b/source/base.js @@ -930,6 +930,82 @@ base.BinaryReader = class { } }; +base.StreamReader = class { + + constructor(stream) { + this._stream = stream; + this._buffer = new Uint8Array(8); + this._view = new DataView(this._buffer.buffer, this._buffer.byteOffset, this._buffer.byteLength); + } + + get position() { + return this._stream.position; + } + + seek(position) { + this._stream.seek(position); + } + + skip(position) { + this._stream.skip(position); + } + + stream(length) { + return this._stream.stream(length); + } + + read(length) { + return this._stream.read(length); + } + + byte() { + return this._stream.byte(); + } + + int16() { + const buffer = this._stream.read(2); + this._buffer.set(buffer, 0); + return this._view.getInt16(0, true); + } + + int32() { + const buffer = this._stream.read(4); + this._buffer.set(buffer, 0); + return this._view.getInt32(0, true); + } + + uint16() { + const buffer = this._stream.read(2); + this._buffer.set(buffer, 0); + return this._view.getUint16(0, true); + } + + uint32() { + const buffer = this._stream.read(4); + this._buffer.set(buffer, 0); + return this._view.getUint32(0, true); + } + + uint64() { + const low = this.uint32(); + const high = this.uint32(); + if (high === 0) { + return low; + } + const value = (high * 4294967296) + low; + if (Number.isSafeInteger(value)) { + return value; + } + throw new Error("Unsigned 64-bit value exceeds safe integer."); + } + + float32() { + const buffer = this._stream.read(4); + this._buffer.set(buffer, 0); + return this._view.getFloat32(0, true); + } +}; + base.Telemetry = class { constructor(window) { @@ -1114,5 +1190,6 @@ export const Complex64 = base.Complex64; export const Complex128 = base.Complex128; export const BinaryStream = base.BinaryStream; export const BinaryReader = base.BinaryReader; +export const StreamReader = base.StreamReader; export const Telemetry = base.Telemetry; export const Metadata = base.Metadata; diff --git a/source/ggml.js b/source/ggml.js new file mode 100644 index 00000000000..1656adc0736 --- /dev/null +++ b/source/ggml.js @@ -0,0 +1,351 @@ + +import * as base from './base.js'; + +const ggml = {}; +const gguf = {}; + +ggml.ModelFactory = class { + + match(context) { + return gguf.Reader.open(context.stream); + } + + async open(context, target) { + target.read(); + return new ggml.Model(target); + } +}; + +ggml.Model = class { + + constructor(target) { + this.format = target.format; + this.graphs = [ new ggml.Graph(target) ]; + target.metadata.set('tokenizer.ggml.tokens', ''); + target.metadata.set('tokenizer.ggml.scores', ''); + target.metadata.set('tokenizer.ggml.token_type', ''); + target.metadata.set('tokenizer.ggml.merges', ''); + target.metadata.set('tokenizer.ggml.unknown_token_id', ''); + target.metadata.set('tokenizer.ggml.padding_token_id', ''); + target.metadata.set('tokenizer.ggml.add_eos_token', ''); + target.metadata.set('tokenizer.ggml.add_bos_token', ''); + // target.metadata.set('tokenizer.ggml.tokens', '...'); + this.metadata = target.metadata; + } +}; + +ggml.Graph = class { + + constructor(target) { + const metadata = target.metadata; + this.name = metadata.get('general.name'); + this.type = metadata.get('general.architecture'); + this.nodes = []; + this.inputs = []; + this.outputs = []; + const layers = new Map(); + for (const [key, tensor] of target.tensors) { + const parts = key.split('.'); + const name = parts.pop(); + const layer = parts.join('.'); + if (!layers.has(layer)) { + layers.set(layer, []); + } + layers.get(layer).push([ name, tensor ]); + } + for (const [name, weights] of layers) { + const node = new ggml.Node(name, weights); + this.nodes.push(node); + } + } +}; + +ggml.Argument = class { + + constructor(name, value) { + this.name = name; + this.value = value; + } +}; + +ggml.Value = class { + + constructor(name, tensor) { + this.name = name; + this.type = tensor.type; + this.quantization = tensor.quantization; + this.initializer = tensor; + } +}; + +ggml.Node = class { + + constructor(name, weights) { + this.type = { name: 'Layer' }; + this.name = name; + this.inputs = []; + this.outputs = []; + this.attributes = []; + for (const [name, weight] of weights) { + const tensor = new ggml.Tensor(weight); + const value = new ggml.Value(weight.name, tensor); + const argument = new ggml.Argument(name, [ value ]); + this.inputs.push(argument); + } + } +}; + +ggml.TensorType = class { + + constructor(dataType, shape) { + this.dataType = dataType; + this.shape = shape; + } + + toString() { + return (this.dataType || '?') + this.shape.toString(); + } +}; + +ggml.TensorShape = class { + + constructor(dimensions) { + this.dimensions = dimensions; + } + + toString() { + return '[' + this.dimensions.map((dimension) => dimension.toString()).join(',') + ']'; + } +}; + +ggml.Tensor = class { + + constructor(tensor) { + const shape = new ggml.TensorShape(tensor.ne); + this.type = new ggml.TensorType(tensor.dtype, shape); + if (tensor.type !== ggml.QuantizationType.F32 && tensor.type !== ggml.QuantizationType.F16) { + this.quantization = ggml.Utility.enum(ggml.QuantizationType, tensor.type); + } + if (tensor.dtype === 'float32' || tensor.dtype === 'float16') { + this.encoding = '<'; + this._data = tensor.data; + } + } + + get values() { + if (this._data) { + return this._data.peek(); + } + return null; + } +}; + + +gguf.Reader = class { + + static open(stream) { + if (stream && stream.length > 4) { + const signature = String.fromCharCode.apply(null, stream.peek(4)); + if (signature === 'GGUF') { + return new gguf.Reader(stream); + } + } + return null; + } + + constructor(stream) { + this._stream = stream; + const QK_K = 256; + gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([ + [ ggml.QuantizationType.F32, [1, 4] ], + [ ggml.QuantizationType.F16, [1, 2] ], + [ ggml.QuantizationType.Q4_0, [32, 2 + 16] ], + [ ggml.QuantizationType.Q4_1, [32, 2 + 2 + 16] ], + [ ggml.QuantizationType.Q5_0, [32, 2 + 4 + 16] ], + [ ggml.QuantizationType.Q5_1, [32, 2 + 2 + 4 + 16] ], + [ ggml.QuantizationType.Q8_0, [32, 2 + 32] ], + [ ggml.QuantizationType.Q8_1, [32, 4 + 4 + 32] ], + [ ggml.QuantizationType.Q2_K, [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4)] ], + [ ggml.QuantizationType.Q3_K, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12] ], + [ ggml.QuantizationType.Q4_K, [256, 2 + 2 + Math.floor(QK_K / 2) + 12] ], + [ ggml.QuantizationType.Q5_K, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12] ], + [ ggml.QuantizationType.Q6_K, [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16)] ], + [ ggml.QuantizationType.Q8_K, [256, 4 + QK_K + Math.floor(QK_K / 8)] ] + ]); + } + + read() { + const reader = new gguf.StreamReader(this._stream); + this.tensors = new Map(); + this.metadata = new Map(); + const context = {}; + context.header = {}; + context.header.magic = String.fromCharCode.apply(null, reader.read(4)); + context.header.version = reader.uint32(); + this.format = 'GGUF v' + context.header.version.toString(); + if (context.header.version >= 2) { + context.header.n_tensors = reader.uint64(); + context.header.n_kv = reader.uint64(); + for (let i = 0; i < context.header.n_kv; i++) { + const entry = reader.entry(); + this.metadata.set(entry.name, entry.value); + } + for (let i = 0; i < context.header.n_tensors; i++) { + const tensor = reader.tensor(); + switch (tensor.type) { + case ggml.QuantizationType.F32: + tensor.dtype = 'float32'; + break; + case ggml.QuantizationType.F16: + tensor.dtype = 'float16'; + break; + default: + tensor.dtype = '?'; + break; + } + this.tensors.set(tensor.name, tensor); + } + context.alignment = this.metadata.get('general.alignment') || 32; + const offset_pad = reader.position % context.alignment; + if (offset_pad != 0) { + reader.skip(context.alignment - offset_pad); + } + context.offset = reader.position; + if (context.offset < this._stream.length) { + for (const tensor of this.tensors.values()) { + reader.seek(context.offset + tensor.offset); + const [block_size, type_size] = gguf.Reader.GGML_QUANT_SIZES.get(tensor.type); + const n_elems = tensor.ne.reduce((a, b) => a * b, 1); + const n_bytes = Math.floor(n_elems * type_size / block_size); + tensor.data = reader.stream(n_bytes); + } + } + } + this._stream.seek(0); + delete this._stream; + } +}; + +gguf.StreamReader = class extends base.StreamReader { + + constructor(stream) { + super(stream); + } + + string() { + const size = this.uint64(); + const buffer = this.read(size); + return String.fromCharCode.apply(null, buffer); + } + + value(type) { + switch (type) { + case gguf.Type.UINT32: { + return this.uint32(); + } + case gguf.Type.INT32: { + return this.int32(); + } + case gguf.Type.FLOAT32: { + return this.float32(); + } + case gguf.Type.BOOL: { + return this.byte() !== 0; + } + case gguf.Type.STRING: { + return this.string(); + } + case gguf.Type.ARRAY: { + const type = this.uint32(); + const size = this.uint64(); + const value = new Array(size); + for (let i = 0; i < size; i++) { + value[i] = this.value(type); + } + return value; + } + default: { + throw new ggml.Error("Unsupported GGUF type '" + type + "'."); + } + } + } + + entry() { + const name = this.string(); + const type = this.uint32(); + const value = this.value(type); + return { name: name, value: value, type: type }; + } + + tensor() { + const tensor = {}; + tensor.name = this.string(); + const n_dims = this.uint32(); + tensor.ne = new Array(n_dims); + for (let i = 0; i < n_dims; i++) { + tensor.ne[i] = this.uint64(); + } + tensor.type = this.uint32(); + tensor.offset = this.uint64(); + return tensor; + } +}; + +gguf.Type = { + UINT8: 0, + INT8: 1, + UINT16: 2, + INT16: 3, + UINT32: 4, + INT32: 5, + FLOAT32: 6, + BOOL: 7, + STRING: 8, + ARRAY: 9, + UINT64: 10, + INT64: 11, + FLOAT64: 12, +}; + +ggml.QuantizationType = { + F32: 0, + F16: 1, + Q4_0: 2, + Q4_1: 3, + Q5_0: 6, + Q5_1: 7, + Q8_0: 8, + Q8_1: 9, + Q2_K: 10, + Q3_K: 11, + Q4_K: 12, + Q5_K: 13, + Q6_K: 14, + Q8_K: 15 +}; + +ggml.Utility = class { + + static enum(type, value) { + ggml.Utility._enums = ggml.Utility._enums || new Map(); + if (!ggml.Utility._enums.has(type)) { + const entries = new Map(Object.entries(type).map(([key, value]) => [ value, key ])); + ggml.Utility._enums.set(type, entries); + } + const entires = ggml.Utility._enums.get(type); + if (entires.has(value)) { + return entires.get(value); + } + return value; + } +}; + +ggml.Error = class extends Error { + + constructor(message) { + super(message); + this.name = 'GGML Error'; + } +}; + +export const ModelFactory = ggml.ModelFactory; diff --git a/source/view.js b/source/view.js index 8695c10264e..c4bbe5c2032 100644 --- a/source/view.js +++ b/source/view.js @@ -5210,6 +5210,7 @@ view.ModelFactoryService = class { this.register('./imgdnn', [ '.dnn', 'params', '.json' ]); this.register('./flax', [ '.msgpack' ]); this.register('./om', [ '.om', '.onnx', '.pb', '.engine' ]); + this.register('./ggml', [ '.gguf' ]); this.register('./nnabla', [ '.nntxt' ], [ '.nnp' ]); this.register('./hickle', [ '.h5', '.hkl' ]); this.register('./nnef', [ '.nnef', '.dat' ]); diff --git a/test/models.json b/test/models.json index 4ae1b669024..42e3650689f 100644 --- a/test/models.json +++ b/test/models.json @@ -2010,6 +2010,13 @@ "format": "Flux", "link": "https://github.com/lutzroeder/netron/issues/334" }, + { + "type": "ggml", + "target": "mixtral-8x7b-v0.1.Q4_K_M.gguf", + "source": "https://github.com/lutzroeder/netron/files/13802180/mixtral-8x7b-v0.1.Q4_K_M.gguf.zip[mixtral-8x7b-v0.1.Q4_K_M.gguf]", + "format": "GGUF v3", + "link": "https://github.com/lutzroeder/netron/issues/1209" + }, { "type": "hailo", "target": "fcn_hailo_pp_v2.har", diff --git a/tools/ggml b/tools/ggml new file mode 100755 index 00000000000..8f197d6cbfa --- /dev/null +++ b/tools/ggml @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e +pushd $(cd $(dirname ${0})/..; pwd) > /dev/null + +clean() { + echo "mslite clean" + rm -rf "./third_party/source/llama.cpp" +} + +sync() { + echo "ggml sync" + [ -d "./third_party/source/llama.cpp" ] || git clone --quiet https://github.com/ggerganov/llama.cpp.git "./third_party/source/llama.cpp" +} + +while [ "$#" != 0 ]; do + command="$1" && shift + case "${command}" in + "clean") clean;; + "sync") sync;; + esac +done