Skip to content

Commit

Permalink
convert : fix python 3.8 support, modernize type annotations (ggergan…
Browse files Browse the repository at this point in the history
…ov#2916)

* convert : fix python 3.8 support

* convert : sort imports

* convert : fix required parameters in convert-llama-ggmlv3-to-gguf

* convert : fix mypy errors in convert-llama-ggmlv3-to-gguf

* convert : use PEP 585 generics and PEP 604 unions

Now that we have `from __future__ import annotations`, we can use this
modern syntax in Python 3.7 instead of restricting support to Python 3.9
or 3.10 respectively.

* gguf.py : a tuple is already a tuple

* add mypy.ini

* convert : add necessary `type: ignore` comments

* gguf-py: bump version
  • Loading branch information
cebtenzzre authored Aug 31, 2023
1 parent 8afe228 commit 92d0b75
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 163 deletions.
23 changes: 13 additions & 10 deletions convert-falcon-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
#!/usr/bin/env python3
# HF falcon--> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import Any

import gguf
import numpy as np
import torch
import argparse
from transformers import AutoTokenizer # type: ignore[import]

from typing import Any, List
from pathlib import Path
from transformers import AutoTokenizer

def bytes_to_unicode():
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
Expand Down Expand Up @@ -114,9 +117,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytearray] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytearray] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
Expand Down
20 changes: 11 additions & 9 deletions convert-gptneox-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
#!/usr/bin/env python3
# HF gptneox--> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import Any

import gguf
import numpy as np
import torch
import argparse

from typing import Any, List
from pathlib import Path
from transformers import AutoTokenizer
from transformers import AutoTokenizer # type: ignore[import]

# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py

Expand Down Expand Up @@ -112,7 +114,7 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytearray] = []
tokens: list[bytearray] = []

tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
Expand Down
29 changes: 16 additions & 13 deletions convert-llama-7b-pth-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
# Only models with a single datafile are supported, like 7B
# HF files required in the model dir: config.json tokenizer_config.json tokenizer.json tokenizer.model

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any

import gguf
import numpy as np
import torch
import argparse
from sentencepiece import SentencePieceProcessor # type: ignore[import]

from typing import Any, List, TypeAlias
from pathlib import Path
from sentencepiece import SentencePieceProcessor
if TYPE_CHECKING:
from typing import TypeAlias

#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
NDArray: TypeAlias = 'np.ndarray[Any, Any]'


def count_model_parts(dir_model: Path) -> int:
Expand Down Expand Up @@ -129,9 +132,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytes] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_model_file = dir_model / 'tokenizer.model'
if not tokenizer_model_file.is_file():
Expand Down
18 changes: 11 additions & 7 deletions convert-llama-ggmlv3-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/usr/bin/env python3
import sys, struct, math, argparse
from pathlib import Path
from __future__ import annotations

import numpy as np
import argparse
import math
import struct
import sys
from pathlib import Path

import gguf
import numpy as np

# Note: Does not support GGML_QKK_64
QK_K = 256
Expand Down Expand Up @@ -72,7 +76,7 @@ def load(self, data, offset, n_vocab):
class Tensor:
def __init__(self):
self.name = None
self.dims = ()
self.dims: tuple[int, ...] = ()
self.dtype = None
self.start_offset = 0
self.len_bytes = np.int64(0)
Expand Down Expand Up @@ -119,7 +123,7 @@ def load(self, data, offset):
offset += hp.load(data, offset)
vocab = Vocab()
offset += vocab.load(data, offset, hp.n_vocab)
tensors = []
tensors: list[Tensor] = []
tensor_map = {}
while offset < len(data):
tensor = Tensor()
Expand Down Expand Up @@ -305,8 +309,8 @@ def handle_metadata(cfg, hp):

def handle_args():
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
parser.add_argument('--input', '-i', type = Path, help = 'Input GGMLv3 filename')
parser.add_argument('--output', '-o', type = Path, help ='Output GGUF filename')
parser.add_argument('--input', '-i', type = Path, required = True, help = 'Input GGMLv3 filename')
parser.add_argument('--output', '-o', type = Path, required = True, help ='Output GGUF filename')
parser.add_argument('--name', help = 'Set model name')
parser.add_argument('--desc', help = 'Set model description')
parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
Expand Down
31 changes: 17 additions & 14 deletions convert-llama-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
#!/usr/bin/env python3
# HF llama --> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any

import gguf
import numpy as np
import torch
import argparse
from sentencepiece import SentencePieceProcessor # type: ignore[import]

from typing import Any, List, Optional, TypeAlias
from pathlib import Path
from sentencepiece import SentencePieceProcessor
if TYPE_CHECKING:
from typing import TypeAlias

#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
NDArray: TypeAlias = 'np.ndarray[Any, Any]'

# reverse HF permute back to original pth layout
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py


def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: int | None = None) -> NDArray:
if n_kv_head is not None and n_head != n_kv_head:
n_head //= n_kv_head

Expand Down Expand Up @@ -136,9 +139,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytes] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_model_file = dir_model / 'tokenizer.model'
if not tokenizer_model_file.is_file():
Expand Down
8 changes: 5 additions & 3 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env python3
from __future__ import annotations

import json
import os
import re
import struct
import sys
from typing import Any, Dict, Sequence, BinaryIO
from typing import Any, BinaryIO, Sequence

import numpy as np
import torch

NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1}
NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}


HF_SUBLAYER_TO_GGML = {
Expand Down Expand Up @@ -46,7 +48,7 @@ def translate_tensor_name(t: str) -> str:
sys.exit(1)


def write_file_header(fout: BinaryIO, params: Dict[str, Any]) -> None:
def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(b"ggla"[::-1]) # magic (ggml lora)
fout.write(struct.pack("i", 1)) # file version
fout.write(struct.pack("i", params["r"]))
Expand Down
Loading

0 comments on commit 92d0b75

Please sign in to comment.