update new version 1.1.2

shibing624 · Nov 2, 2023 · cba05f3 · cba05f3
1 parent 20bcf44
commit cba05f3
Show file tree

Hide file tree

Showing 13 changed files with 146 additions and 188 deletions.
diff --git a/README.md b/README.md
@@ -23,6 +23,8 @@
 
 ## 🔥 News
 
+[2023/11/02] v1.1.2版本: GPT模型支持了[NEFTune](https://github.com/neelsjain/NEFTune)给embedding加噪SFT训练方法，SFT中使用 `--neft_alpha` 参数启用 NEFTune，例如 `--neft_alpha 5`。详见[Release-v1.1.2](https://github.com/shibing624/textgen/releases/tag/1.1.2)
+
 [2023/09/05] v1.1.1版本: 支持多卡推理，推理速度加倍，调库textgen做batch推理，多卡推理更方便、快速。详见[Release-v1.1.1](https://github.com/shibing624/textgen/releases/tag/1.1.1)
 
 [2023/08/23] v1.1.0版本: 发布基于ShareGPT4数据集微调的中英文Vicuna-13B模型[shibing624/vicuna-baichuan-13b-chat](https://huggingface.co/shibing624/vicuna-baichuan-13b-chat)，和对应的LoRA模型[shibing624/vicuna-baichuan-13b-chat-lora](https://huggingface.co/shibing624/vicuna-baichuan-13b-chat-lora)，支持多轮对话，评测效果有提升，详见[Release-v1.1.0](https://github.com/shibing624/textgen/releases/tag/1.1.0)
@@ -36,7 +38,7 @@
 
 ## 😊 Feature
 
-- [GPT](textgen/gpt)：本项目基于PyTorch实现了ChatGLM-6B/Baichuan/LLaMA2/BLOOM等GPT模型LoRA微调训练和预测，可以用于对话生成任务和领域微调训练
+- [GPT](textgen/gpt)：本项目基于PyTorch实现了 ChatGLM-6B 1,2,3 / Baichuan 1,2 / LLaMA 1,2 / BLOOM / Mistral / QWen 等GPT模型LoRA微调训练和预测，可以用于对话生成任务和领域微调训练
 - [UDA/EDA](textgen/augment/word_level_augment.py)：本项目实现了UDA(非核心词替换)、EDA和Back Translation(回译)算法，基于TF-IDF将句子中部分不重要词替换为同义词，随机词插入、删除、替换等方法，产生新的文本，实现了文本扩增
 - [Seq2Seq](textgen/seq2seq)：本项目基于PyTorch实现了Seq2Seq、ConvSeq2Seq、BART模型的训练和预测，可以用于文本翻译、对话生成、摘要生成等文本生成任务
 - [T5](textgen/t5)：本项目基于PyTorch实现了T5和CopyT5模型训练和预测，可以用于文本翻译、对话生成、对联生成、文案撰写等文本生成任务

diff --git a/examples/gpt/inference_demo.py b/examples/gpt/inference_demo.py
@@ -22,11 +22,9 @@ def main():
     parser.add_argument('--prompt_template_name', default="vicuna", type=str,
                         help="Prompt template name, eg: alpaca, vicuna, baichuan-chat, chatglm2 etc.")
     parser.add_argument('--interactive', action='store_true', help="run in the instruction mode")
-    parser.add_argument('--single_round', action='store_true',
-                        help="Whether to generate single round dialogue, default is multi-round dialogue")
     parser.add_argument('--data_file', default=None, type=str,
                         help="A file that contains instructions (one instruction per line)")
-    parser.add_argument('--predictions_file', default='./predictions_result.jsonl', type=str)
+    parser.add_argument('--output_file', default='./predictions_result.jsonl', type=str)
     parser.add_argument('--batch_size', default=8, type=int, help='Batch size')
     args = parser.parse_args()
     print(args)
@@ -49,19 +47,41 @@ def main():
         for example in examples[:10]:
             print(example)
     if args.interactive:
-        print(f"Start inference with interactive mode. enable multi round: {not args.single_round}")
+        print(f"Start inference with interactive mode.")
         history = []
         while True:
-            raw_input_text = input("Input:")
-            if len(raw_input_text.strip()) == 0:
+            try:
+                query = input("Input:")
+            except UnicodeDecodeError:
+                print("Detected decoding error at the inputs, please try again.")
+                continue
+            except Exception:
+                raise
+            if query == "":
+                print("Please input text, try again.")
+                continue
+            if query.strip() == "clear":
+                history = []
+                print("history cleared.")
+                continue
+            if query.strip() == 'exit':
                 break
-            if args.single_round:
-                response = model.predict([raw_input_text], prompt_template_name=args.prompt_template_name)[0]
-            else:
-                response, history = model.chat(
-                    raw_input_text, history=history, prompt_template_name=args.prompt_template_name)
-            print("Response: ", response)
-            print("\n")
+            print("Response:", end='', flush=True)
+            try:
+                response = ""
+                for new_token in model.chat(
+                        query,
+                        history=history,
+                        prompt_template_name=args.prompt_template_name,
+                        stream=True
+                ):
+                    print(new_token, end='', flush=True)
+                    response += new_token
+                history = history + [[query, response]]
+            except KeyboardInterrupt:
+                print("KeyboardInterrupt detected, stop.")
+                continue
+            print()
     else:
         print("Start inference.")
         results = []
@@ -75,11 +95,11 @@ def main():
             print(f"Input: {example}\n")
             print(f"Output: {response}\n")
             results.append({"Input": example, "Output": response})
-        with open(args.predictions_file, 'w', encoding='utf-8') as f:
+        with open(args.output_file, 'w', encoding='utf-8') as f:
             for entry in results:
                 json.dump(entry, f, ensure_ascii=False)
                 f.write('\n')
-        print(f'save to {args.predictions_file}, size: {len(results)}')
+        print(f'save to {args.output_file}, size: {len(results)}')
 
 
 if __name__ == '__main__':

diff --git a/examples/gpt/training_chatglm_demo.py b/examples/gpt/training_chatglm_demo.py
@@ -67,10 +67,13 @@ def main():
         print(response)
 
         # Chat model with multi turns conversation
-        response, history = model.chat('请问1加2等于多少？')
+        history = []
+        query = "简单介绍下北京"
+        response = model.chat(query, history=history)
+        print(response)
+        history.append([query, response])
+        response = model.chat('继续', history=history)
         print(response)
-        response, history = model.chat('两数相乘呢？', history=history)
-        print(response, history)
 
 
 if __name__ == '__main__':

diff --git a/examples/gpt/training_llama_demo.py b/examples/gpt/training_llama_demo.py
@@ -67,10 +67,13 @@ def main():
         print(response)
 
         # Chat model with multi turns conversation
-        response, history = model.chat('请问1加2等于多少？')
+        history = []
+        query = "简单介绍下北京"
+        response = model.chat(query, history=history)
+        print(response)
+        history.append([query, response])
+        response = model.chat('继续', history=history)
         print(response)
-        response, history = model.chat('两数相乘呢？', history=history)
-        print(response, history)
 
 
 if __name__ == '__main__':

diff --git a/examples/seq2seq/training_seq2seq_model_demo.py b/examples/seq2seq/training_seq2seq_model_demo.py
@@ -4,11 +4,10 @@
 @description:
 """
 import argparse
-import pandas as pd
-from loguru import logger
-import os
 import sys
 
+from loguru import logger
+
 sys.path.append('../..')
 from textgen.seq2seq import Seq2SeqModel
 

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -10,7 +10,7 @@
 import pandas as pd
 
 sys.path.append('..')
-from textgen import GptModel, ChatGlmModel
+from textgen import GptModel
 
 pwd_path = os.path.abspath(os.path.dirname(__file__))
 
@@ -76,7 +76,7 @@ def test_llama_13b_alpaca_plus():
 
 
 def test_chatglm_6b():
-    m = ChatGlmModel('chatglm', "THUDM/chatglm-6b", peft_name=None, args={'use_peft': False})
+    m = GptModel('chatglm', "THUDM/chatglm-6b", peft_name=None, args={'use_peft': False})
     predict_sentences = [get_chatglm_prompt(s) for s in sentences]
     res = m.predict(predict_sentences)
     for s, i in zip(sentences, res):
@@ -91,7 +91,7 @@ def test_chatglm_6b():
 
 
 def test_chatglm_6b_lora():
-    m = ChatGlmModel('chatglm', "THUDM/chatglm-6b", peft_name='shibing624/chatglm-6b-belle-zh-lora',
+    m = GptModel('chatglm', "THUDM/chatglm-6b", peft_name='shibing624/chatglm-6b-belle-zh-lora',
                      args={'use_peft': True}, )
     predict_sentences = [get_chatglm_prompt(s) for s in sentences]
     res = m.predict(predict_sentences)

diff --git a/tests/test_chatglm.py b/tests/test_chatglm.py
@@ -5,15 +5,14 @@
 """
 
 import sys
-import pytest
 
 sys.path.append('..')
-from textgen import ChatGlmArgs, ChatGlmModel
+from textgen import GptModel
 
 
 def test_csc():
     from pycorrector.utils import eval
-    model = ChatGlmModel(
+    model = GptModel(
         'chatglm', "THUDM/chatglm-6b", peft_name="shibing624/chatglm-6b-csc-zh-lora",
         args={'use_peft': True, 'eval_batch_size': 8, "max_length": 128}
     )
@@ -35,27 +34,11 @@ def batch_correct(sentences):
 
 
 def test_origin():
-    m = ChatGlmModel('chatglm', "THUDM/chatglm-6b", args={'use_peft': False})
-    response, history = m.chat("你好", history=[])
+    m = GptModel('chatglm', "THUDM/chatglm-6b", args={'use_peft': False})
+    response = m.chat("你好", history=[])
     print(response)
     assert len(response) > 0
-    response, history = m.chat("晚上睡不着应该怎么办", history=history)
-    print(response)
-    assert len(response) > 0
-
-
-def test_origin_int4():
-    m = ChatGlmModel('chatglm', "THUDM/chatglm-6b-int4", args={'use_peft': False, "quantization_bit": None},
-                     cuda_device=0)
-    response, history = m.chat("你好", history=[], max_length=20)
-    print(response)
-    assert len(response) > 0
-
-
-def test_origin_int4_cpu():
-    m = ChatGlmModel('chatglm', "THUDM/chatglm-6b-int4", use_cuda=False,
-                     args={'use_peft': False, "quantization_bit": None},
-                     cuda_device=0)
-    response, history = m.chat("你好", history=[], max_length=20)
+    history = ["你好", response]
+    response = m.chat("晚上睡不着应该怎么办", history=history)
     print(response)
     assert len(response) > 0
diff --git a/tests/test_chatglm_training.py b/tests/test_chatglm_training.py
@@ -4,12 +4,13 @@
 @description: 
 """
 import sys
+import os
 import pytest
 from torch.utils.data import Dataset
 from datasets import load_dataset, load_from_disk
 
 sys.path.append('..')
-from textgen import ChatGlmModel
+from textgen import GptModel
 
 
 def preprocess_batch_for_hf_dataset(example, tokenizer, args):
@@ -51,7 +52,7 @@ def __getitem__(self, index):
 
 
 def test_train_name():
-    model = ChatGlmModel(
+    model = GptModel(
         "chatglm", "THUDM/chatglm-6b",
         args={
             "dataset_class": MyDataset,
@@ -74,7 +75,7 @@ def test_train_name():
 
 
 def test_second_predict():
-    model = ChatGlmModel("chatglm", "THUDM/chatglm-6b",
+    model = GptModel("chatglm", "THUDM/chatglm-6b",
                          args={"use_peft": True}, peft_name='tmp_outputs')
     # load model from peft_name is equal to load model from output_dir
     sents = ['我要开一家美妆店，帮我起一个店铺名\n答：']

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -11,7 +11,7 @@
 from transformers import AutoTokenizer
 
 sys.path.append('..')
-from textgen.llama.llama_utils import LlamaPretrainingDataset
+from textgen import GptSupervisedDataset
 from textgen import GptArgs
 
 
@@ -29,7 +29,7 @@ def test_data():
     train_data = load_data('../examples/data/pt.txt')
     train_df = pd.DataFrame(train_data, columns=["text"])
     eval_df = train_df[:10]
-    ds = LlamaPretrainingDataset(
+    ds = GptSupervisedDataset(
         tokenizer,
         args,
         train_df,

diff --git a/tests/test_llama.py b/tests/test_llama.py
@@ -38,77 +38,6 @@ def test_origin_7b():
     r = m.predict([predict_sentence])
     print(r)
     assert len(r) > 0
-    response, history = m.chat("你好", history=[])
+    response = m.chat("你好", history=[])
     print(response)
     assert len(response) > 0
-    response, history = m.chat("晚上睡不着应该怎么办", history=history)
-    print(response)
-    assert len(response) > 0
-
-    predict_sentences = [generate_prompt(s) for s in sents]
-    res = m.predict(predict_sentences)
-    for s, i in zip(sents, res):
-        print(s, i)
-        print()
-
-
-def test_lora_7b():
-    m = GptModel('llama', "decapoda-research/llama-7b-hf", peft_name='ziqingyang/chinese-alpaca-lora-7b',
-                 args={'use_peft': True}, )
-    predict_sentence = generate_prompt("失眠怎么办？")
-    r = m.predict([predict_sentence])
-    print(r)
-    assert len(r) > 0
-    response, history = m.chat("你好", history=[])
-    print(response)
-    assert len(response) > 0
-    response, history = m.chat("晚上睡不着应该怎么办", history=history)
-    print(response)
-    assert len(response) > 0
-
-    predict_sentences = [generate_prompt(s) for s in sents]
-    res = m.predict(predict_sentences)
-    for s, i in zip(sents, res):
-        print(s, i)
-        print()
-
-
-def test_origin_13b():
-    m = GptModel('llama', "decapoda-research/llama-13b-hf", args={'use_peft': False})
-    predict_sentence = generate_prompt("失眠怎么办？")
-    r = m.predict([predict_sentence])
-    print(r)
-    assert len(r) > 0
-    response, history = m.chat("你好", history=[])
-    print(response)
-    assert len(response) > 0
-    response, history = m.chat("晚上睡不着应该怎么办", history=history)
-    print(response)
-    assert len(response) > 0
-
-    predict_sentences = [generate_prompt(s) for s in sents]
-    res = m.predict(predict_sentences)
-    for s, i in zip(sents, res):
-        print(s, i)
-        print()
-
-
-def test_lora_13b():
-    m = GptModel('llama', "decapoda-research/llama-13b-hf", peft_name='shibing624/llama-13b-belle-zh-lora',
-                 args={'use_peft': True}, )
-    predict_sentence = generate_prompt("失眠怎么办？")
-    r = m.predict([predict_sentence])
-    print(r)
-    assert len(r) > 0
-    response, history = m.chat("你好", history=[])
-    print(response)
-    assert len(response) > 0
-    response, history = m.chat("晚上睡不着应该怎么办", history=history)
-    print(response)
-    assert len(response) > 0
-
-    predict_sentences = [generate_prompt(s) for s in sents]
-    res = m.predict(predict_sentences)
-    for s, i in zip(sents, res):
-        print(s, i)
-        print()
diff --git a/textgen/__init__.py b/textgen/__init__.py
@@ -4,7 +4,7 @@
 @description: 
 """
 
-__version__ = '1.1.1'
+__version__ = '1.1.2'
 
 from textgen.augment.text_augment import TextAugment
 

diff --git a/textgen/config/model_args.py b/textgen/config/model_args.py
@@ -392,3 +392,4 @@ class GptArgs(ModelArgs):
     qlora: bool = False
     preprocessing_num_workers: int = 4
     prompt_template_name: str = "vicuna"
+    neft_alpha: int = 0  # 5