diff --git a/package.js b/package.js
index 5684a56d5ce..87d736902a4 100644
--- a/package.js
+++ b/package.js
@@ -519,6 +519,7 @@ const update = async () => {
         'coreml',
         'dlc',
         'dnn',
+        'ggml',
         'keras',
         'mnn',
         'mslite',
diff --git a/source/base.js b/source/base.js
index 625bcd4d82e..8bc7081a037 100644
--- a/source/base.js
+++ b/source/base.js
@@ -930,6 +930,82 @@ base.BinaryReader = class {
     }
 };
 
+base.StreamReader = class {
+
+    constructor(stream) {
+        this._stream = stream;
+        this._buffer = new Uint8Array(8);
+        this._view = new DataView(this._buffer.buffer, this._buffer.byteOffset, this._buffer.byteLength);
+    }
+
+    get position() {
+        return this._stream.position;
+    }
+
+    seek(position) {
+        this._stream.seek(position);
+    }
+
+    skip(position) {
+        this._stream.skip(position);
+    }
+
+    stream(length) {
+        return this._stream.stream(length);
+    }
+
+    read(length) {
+        return this._stream.read(length);
+    }
+
+    byte() {
+        return this._stream.byte();
+    }
+
+    int16() {
+        const buffer = this._stream.read(2);
+        this._buffer.set(buffer, 0);
+        return this._view.getInt16(0, true);
+    }
+
+    int32() {
+        const buffer = this._stream.read(4);
+        this._buffer.set(buffer, 0);
+        return this._view.getInt32(0, true);
+    }
+
+    uint16() {
+        const buffer = this._stream.read(2);
+        this._buffer.set(buffer, 0);
+        return this._view.getUint16(0, true);
+    }
+
+    uint32() {
+        const buffer = this._stream.read(4);
+        this._buffer.set(buffer, 0);
+        return this._view.getUint32(0, true);
+    }
+
+    uint64() {
+        const low = this.uint32();
+        const high = this.uint32();
+        if (high === 0) {
+            return low;
+        }
+        const value = (high * 4294967296) + low;
+        if (Number.isSafeInteger(value)) {
+            return value;
+        }
+        throw new Error("Unsigned 64-bit value exceeds safe integer.");
+    }
+
+    float32() {
+        const buffer = this._stream.read(4);
+        this._buffer.set(buffer, 0);
+        return this._view.getFloat32(0, true);
+    }
+};
+
 base.Telemetry = class {
 
     constructor(window) {
@@ -1114,5 +1190,6 @@ export const Complex64 = base.Complex64;
 export const Complex128 = base.Complex128;
 export const BinaryStream = base.BinaryStream;
 export const BinaryReader = base.BinaryReader;
+export const StreamReader = base.StreamReader;
 export const Telemetry = base.Telemetry;
 export const Metadata = base.Metadata;
diff --git a/source/ggml.js b/source/ggml.js
new file mode 100644
index 00000000000..1656adc0736
--- /dev/null
+++ b/source/ggml.js
@@ -0,0 +1,351 @@
+
+import * as base from './base.js';
+
+const ggml = {};
+const gguf = {};
+
+ggml.ModelFactory = class {
+
+    match(context) {
+        return gguf.Reader.open(context.stream);
+    }
+
+    async open(context, target) {
+        target.read();
+        return new ggml.Model(target);
+    }
+};
+
+ggml.Model = class {
+
+    constructor(target) {
+        this.format = target.format;
+        this.graphs = [ new ggml.Graph(target) ];
+        target.metadata.set('tokenizer.ggml.tokens', '');
+        target.metadata.set('tokenizer.ggml.scores', '');
+        target.metadata.set('tokenizer.ggml.token_type', '');
+        target.metadata.set('tokenizer.ggml.merges', '');
+        target.metadata.set('tokenizer.ggml.unknown_token_id', '');
+        target.metadata.set('tokenizer.ggml.padding_token_id', '');
+        target.metadata.set('tokenizer.ggml.add_eos_token', '');
+        target.metadata.set('tokenizer.ggml.add_bos_token', '');
+        // target.metadata.set('tokenizer.ggml.tokens', '...');
+        this.metadata = target.metadata;
+    }
+};
+
+ggml.Graph = class {
+
+    constructor(target) {
+        const metadata = target.metadata;
+        this.name = metadata.get('general.name');
+        this.type = metadata.get('general.architecture');
+        this.nodes = [];
+        this.inputs = [];
+        this.outputs = [];
+        const layers = new Map();
+        for (const [key, tensor] of target.tensors) {
+            const parts = key.split('.');
+            const name = parts.pop();
+            const layer = parts.join('.');
+            if (!layers.has(layer)) {
+                layers.set(layer, []);
+            }
+            layers.get(layer).push([ name, tensor ]);
+        }
+        for (const [name, weights] of layers) {
+            const node = new ggml.Node(name, weights);
+            this.nodes.push(node);
+        }
+    }
+};
+
+ggml.Argument = class {
+
+    constructor(name, value) {
+        this.name = name;
+        this.value = value;
+    }
+};
+
+ggml.Value = class {
+
+    constructor(name, tensor) {
+        this.name = name;
+        this.type = tensor.type;
+        this.quantization = tensor.quantization;
+        this.initializer = tensor;
+    }
+};
+
+ggml.Node = class {
+
+    constructor(name, weights) {
+        this.type = { name: 'Layer' };
+        this.name = name;
+        this.inputs = [];
+        this.outputs = [];
+        this.attributes = [];
+        for (const [name, weight] of weights) {
+            const tensor = new ggml.Tensor(weight);
+            const value = new ggml.Value(weight.name, tensor);
+            const argument = new ggml.Argument(name, [ value ]);
+            this.inputs.push(argument);
+        }
+    }
+};
+
+ggml.TensorType = class {
+
+    constructor(dataType, shape) {
+        this.dataType = dataType;
+        this.shape = shape;
+    }
+
+    toString() {
+        return (this.dataType || '?') + this.shape.toString();
+    }
+};
+
+ggml.TensorShape = class {
+
+    constructor(dimensions) {
+        this.dimensions = dimensions;
+    }
+
+    toString() {
+        return '[' + this.dimensions.map((dimension) => dimension.toString()).join(',') + ']';
+    }
+};
+
+ggml.Tensor = class {
+
+    constructor(tensor) {
+        const shape = new ggml.TensorShape(tensor.ne);
+        this.type = new ggml.TensorType(tensor.dtype, shape);
+        if (tensor.type !== ggml.QuantizationType.F32 && tensor.type !== ggml.QuantizationType.F16) {
+            this.quantization = ggml.Utility.enum(ggml.QuantizationType, tensor.type);
+        }
+        if (tensor.dtype === 'float32' || tensor.dtype === 'float16') {
+            this.encoding = '<';
+            this._data = tensor.data;
+        }
+    }
+
+    get values() {
+        if (this._data) {
+            return this._data.peek();
+        }
+        return null;
+    }
+};
+
+
+gguf.Reader = class {
+
+    static open(stream) {
+        if (stream && stream.length > 4) {
+            const signature = String.fromCharCode.apply(null, stream.peek(4));
+            if (signature === 'GGUF') {
+                return new gguf.Reader(stream);
+            }
+        }
+        return null;
+    }
+
+    constructor(stream) {
+        this._stream = stream;
+        const QK_K = 256;
+        gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([
+            [ ggml.QuantizationType.F32,  [1, 4] ],
+            [ ggml.QuantizationType.F16,  [1, 2] ],
+            [ ggml.QuantizationType.Q4_0, [32, 2 + 16] ],
+            [ ggml.QuantizationType.Q4_1, [32, 2 + 2 + 16] ],
+            [ ggml.QuantizationType.Q5_0, [32, 2 + 4 + 16] ],
+            [ ggml.QuantizationType.Q5_1, [32, 2 + 2 + 4 + 16] ],
+            [ ggml.QuantizationType.Q8_0, [32, 2 + 32] ],
+            [ ggml.QuantizationType.Q8_1, [32, 4 + 4 + 32] ],
+            [ ggml.QuantizationType.Q2_K, [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4)] ],
+            [ ggml.QuantizationType.Q3_K, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12] ],
+            [ ggml.QuantizationType.Q4_K, [256, 2 + 2 + Math.floor(QK_K / 2) + 12] ],
+            [ ggml.QuantizationType.Q5_K, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12] ],
+            [ ggml.QuantizationType.Q6_K, [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16)] ],
+            [ ggml.QuantizationType.Q8_K, [256, 4 + QK_K + Math.floor(QK_K / 8)] ]
+        ]);
+    }
+
+    read() {
+        const reader = new gguf.StreamReader(this._stream);
+        this.tensors = new Map();
+        this.metadata = new Map();
+        const context = {};
+        context.header = {};
+        context.header.magic = String.fromCharCode.apply(null, reader.read(4));
+        context.header.version = reader.uint32();
+        this.format = 'GGUF v' + context.header.version.toString();
+        if (context.header.version >= 2) {
+            context.header.n_tensors = reader.uint64();
+            context.header.n_kv = reader.uint64();
+            for (let i = 0; i < context.header.n_kv; i++) {
+                const entry = reader.entry();
+                this.metadata.set(entry.name, entry.value);
+            }
+            for (let i = 0; i < context.header.n_tensors; i++) {
+                const tensor = reader.tensor();
+                switch (tensor.type) {
+                    case ggml.QuantizationType.F32:
+                        tensor.dtype = 'float32';
+                        break;
+                    case ggml.QuantizationType.F16:
+                        tensor.dtype = 'float16';
+                        break;
+                    default:
+                        tensor.dtype = '?';
+                        break;
+                }
+                this.tensors.set(tensor.name, tensor);
+            }
+            context.alignment = this.metadata.get('general.alignment') || 32;
+            const offset_pad = reader.position % context.alignment;
+            if (offset_pad != 0) {
+                reader.skip(context.alignment - offset_pad);
+            }
+            context.offset = reader.position;
+            if (context.offset < this._stream.length) {
+                for (const tensor of this.tensors.values()) {
+                    reader.seek(context.offset + tensor.offset);
+                    const [block_size, type_size] = gguf.Reader.GGML_QUANT_SIZES.get(tensor.type);
+                    const n_elems = tensor.ne.reduce((a, b) => a * b, 1);
+                    const n_bytes = Math.floor(n_elems * type_size / block_size);
+                    tensor.data = reader.stream(n_bytes);
+                }
+            }
+        }
+        this._stream.seek(0);
+        delete this._stream;
+    }
+};
+
+gguf.StreamReader = class extends base.StreamReader {
+
+    constructor(stream) {
+        super(stream);
+    }
+
+    string() {
+        const size = this.uint64();
+        const buffer = this.read(size);
+        return String.fromCharCode.apply(null, buffer);
+    }
+
+    value(type) {
+        switch (type) {
+            case gguf.Type.UINT32: {
+                return this.uint32();
+            }
+            case gguf.Type.INT32: {
+                return this.int32();
+            }
+            case gguf.Type.FLOAT32: {
+                return this.float32();
+            }
+            case gguf.Type.BOOL: {
+                return this.byte() !== 0;
+            }
+            case gguf.Type.STRING: {
+                return this.string();
+            }
+            case gguf.Type.ARRAY: {
+                const type = this.uint32();
+                const size = this.uint64();
+                const value = new Array(size);
+                for (let i = 0; i < size; i++) {
+                    value[i] = this.value(type);
+                }
+                return value;
+            }
+            default: {
+                throw new ggml.Error("Unsupported GGUF type '" + type + "'.");
+            }
+        }
+    }
+
+    entry() {
+        const name = this.string();
+        const type = this.uint32();
+        const value = this.value(type);
+        return { name: name, value: value, type: type };
+    }
+
+    tensor() {
+        const tensor = {};
+        tensor.name = this.string();
+        const n_dims = this.uint32();
+        tensor.ne = new Array(n_dims);
+        for (let i = 0; i < n_dims; i++) {
+            tensor.ne[i] = this.uint64();
+        }
+        tensor.type = this.uint32();
+        tensor.offset = this.uint64();
+        return tensor;
+    }
+};
+
+gguf.Type = {
+    UINT8: 0,
+    INT8: 1,
+    UINT16: 2,
+    INT16: 3,
+    UINT32: 4,
+    INT32: 5,
+    FLOAT32: 6,
+    BOOL: 7,
+    STRING: 8,
+    ARRAY: 9,
+    UINT64: 10,
+    INT64: 11,
+    FLOAT64: 12,
+};
+
+ggml.QuantizationType = {
+    F32: 0,
+    F16: 1,
+    Q4_0: 2,
+    Q4_1: 3,
+    Q5_0: 6,
+    Q5_1: 7,
+    Q8_0: 8,
+    Q8_1: 9,
+    Q2_K: 10,
+    Q3_K: 11,
+    Q4_K: 12,
+    Q5_K: 13,
+    Q6_K: 14,
+    Q8_K: 15
+};
+
+ggml.Utility = class {
+
+    static enum(type, value) {
+        ggml.Utility._enums = ggml.Utility._enums || new Map();
+        if (!ggml.Utility._enums.has(type)) {
+            const entries = new Map(Object.entries(type).map(([key, value]) => [ value, key ]));
+            ggml.Utility._enums.set(type, entries);
+        }
+        const entires = ggml.Utility._enums.get(type);
+        if (entires.has(value)) {
+            return entires.get(value);
+        }
+        return value;
+    }
+};
+
+ggml.Error = class extends Error {
+
+    constructor(message) {
+        super(message);
+        this.name = 'GGML Error';
+    }
+};
+
+export const ModelFactory = ggml.ModelFactory;
diff --git a/source/view.js b/source/view.js
index 8695c10264e..c4bbe5c2032 100644
--- a/source/view.js
+++ b/source/view.js
@@ -5210,6 +5210,7 @@ view.ModelFactoryService = class {
         this.register('./imgdnn', [ '.dnn', 'params', '.json' ]);
         this.register('./flax', [ '.msgpack' ]);
         this.register('./om', [ '.om', '.onnx', '.pb', '.engine' ]);
+        this.register('./ggml', [ '.gguf' ]);
         this.register('./nnabla', [ '.nntxt' ], [ '.nnp' ]);
         this.register('./hickle', [ '.h5', '.hkl' ]);
         this.register('./nnef', [ '.nnef', '.dat' ]);
diff --git a/test/models.json b/test/models.json
index 4ae1b669024..42e3650689f 100644
--- a/test/models.json
+++ b/test/models.json
@@ -2010,6 +2010,13 @@
     "format":   "Flux",
     "link":     "https://github.com/lutzroeder/netron/issues/334"
   },
+  {
+    "type":     "ggml",
+    "target":   "mixtral-8x7b-v0.1.Q4_K_M.gguf",
+    "source":   "https://github.com/lutzroeder/netron/files/13802180/mixtral-8x7b-v0.1.Q4_K_M.gguf.zip[mixtral-8x7b-v0.1.Q4_K_M.gguf]",
+    "format":   "GGUF v3",
+    "link":     "https://github.com/lutzroeder/netron/issues/1209"
+  },
   {
     "type":     "hailo",
     "target":   "fcn_hailo_pp_v2.har",
diff --git a/tools/ggml b/tools/ggml
new file mode 100755
index 00000000000..8f197d6cbfa
--- /dev/null
+++ b/tools/ggml
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+pushd $(cd $(dirname ${0})/..; pwd) > /dev/null
+
+clean() {
+    echo "mslite clean"
+    rm -rf "./third_party/source/llama.cpp"
+}
+
+sync() {
+    echo "ggml sync"
+    [ -d "./third_party/source/llama.cpp" ] || git clone --quiet https://github.com/ggerganov/llama.cpp.git "./third_party/source/llama.cpp"
+}
+
+while [ "$#" != 0 ]; do
+    command="$1" && shift
+    case "${command}" in
+        "clean") clean;;
+        "sync") sync;;
+    esac
+done