Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add medical and finance dataset #769

Open
wants to merge 5 commits into
base: llm
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions federatedscope/llm/dataloader/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,32 @@ def load_llm_dataset(config=None, **kwargs):
list_data_dict[i]['output'] = \
list_data_dict[i]['output'].replace('####', 'The answer is')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == "medical_tc":
fp = os.path.join(config.data.root, 'medical_tc_train.jsonl')
if not os.path.exists(fp):
download_url(
'https://federatedscope.oss-cn-beijing.aliyuncs.com/FS-LLM'
'/medical_tc_train.jsonl', config.data.root)
os.rename(os.path.join(config.data.root, 'train.jsonl'), fp)
list_data_dict = load_jsonl(fp,
instruction='instruction',
input='input',
output='output',
category='output')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == "finance":
fp = os.path.join(config.data.root, 'finance_train_data.jsonl')
if not os.path.exists(fp):
download_url(
'https://federatedscope.oss-cn-beijing.aliyuncs.com/FS-LLM'
'/finance_train_data.jsonl', config.data.root)
os.rename(os.path.join(config.data.root, 'train.jsonl'), fp)
list_data_dict = load_jsonl(fp,
instruction='instruction',
input='input',
output='output',
category='category')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == 'code_search_net':
from tqdm import tqdm
from federatedscope.llm.dataset.code_search_net import \
Expand Down
179 changes: 179 additions & 0 deletions federatedscope/llm/eval/eval_for_finance/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import os
import torch
import numpy as np
import pandas as pd
import json
import transformers

from federatedscope.core.configs.config import global_cfg
from federatedscope.core.cmd_args import parse_args, parse_client_cfg
from federatedscope.core.auxiliaries.utils import setup_seed
from federatedscope.core.auxiliaries.logging import update_logger
from federatedscope.llm.misc.fschat import FSChatBot
from federatedscope.core.data.utils import download_url

# import torch._dynamo
# torch._dynamo.config.suppress_errors = True

transformers.logging.set_verbosity(40)

choices = ["A", "B", "C", "D"]


def format_subject(subject):
ll = subject.split("_")
s = ""
for entry in ll:
s += " " + entry
return s


def format_example(df, idx, include_answer=True):
prompt = df.iloc[idx, 0]
k = df.shape[1] - 2
for j in range(k):
prompt += "\n{}. {}".format(choices[j], df.iloc[idx, j + 1])
prompt += "\nAnswer:"
if include_answer:
prompt += " {}\n\n".format(df.iloc[idx, k + 1])
return prompt


def gen_prompt(train_df, subject, k=-1):
prompt = "The following are multiple choice \
questions (with answers) about {}.\n\n".format(format_subject(subject))
if k == -1:
k = train_df.shape[0]
for i in range(k):
prompt += format_example(train_df, i)
return prompt


@torch.no_grad()
def eval(subject, model, tokenizer, test_df, device):
cors = []
all_probs = []

for i in range(test_df.shape[0]):
# get prompt and make sure it fits
prompt = format_example(test_df, i, include_answer=False)

input_ids = tokenizer(
prompt,
return_tensors="pt",
max_length=tokenizer.model_max_length,
).input_ids.to(device)

while input_ids.shape[-1] > 1024:
input_ids = tokenizer(prompt,
return_tensors="pt").input_ids.to(device)

label = test_df.iloc[i, test_df.shape[1] - 1]

logits = model(input_ids=input_ids).logits[0, -1]

probs = (torch.nn.functional.softmax(
torch.tensor([
logits[tokenizer("A").input_ids[-1]],
logits[tokenizer("B").input_ids[-1]],
logits[tokenizer("C").input_ids[-1]],
logits[tokenizer("D").input_ids[-1]],
]).float(),
dim=0,
).detach().cpu().numpy())
pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(probs)]

cor = pred == label
cors.append(cor)
all_probs.append(probs)

acc = np.mean(cors)
cors = np.array(cors)

all_probs = np.array(all_probs)
print("Average accuracy {:.3f} - {}".format(acc, subject))

return cors, acc, all_probs


def main():
init_cfg = global_cfg.clone()
args = parse_args()

if args.cfg_file:
init_cfg.merge_from_file(args.cfg_file)
cfg_opt, client_cfg_opt = parse_client_cfg(args.opts)
init_cfg.merge_from_list(cfg_opt)

update_logger(init_cfg, clear_before_add=True)
setup_seed(init_cfg.seed)

# load your finetuned model (saved as xxx.ckpt)
# in yaml file federate.save_to
fschatbot = FSChatBot(init_cfg)
tokenizer = fschatbot.tokenizer
model = fschatbot.model
device = fschatbot.device

if not os.path.exists("data/FinEval"):
download_url(
"https://federatedscope.oss-cn-beijing.aliyuncs.com/FS"
"-LLM/FinEval.zip", init_cfg.data.root)
print("Please unzip the file and rerun")
return

data_dir = os.path.join(init_cfg.data.root, "FinEval")
eval_dir = "finance_eval_result"

subjects = sorted([
f.split("_dev.csv")[0]
for f in os.listdir(os.path.join(data_dir, "dev")) if "_dev.csv" in f
])

if not os.path.exists(eval_dir):
os.makedirs(eval_dir)
if not os.path.exists(
os.path.join(eval_dir, "results_{}".format(
init_cfg.federate.save_to))):
os.makedirs(
os.path.join(eval_dir,
"results_{}".format(init_cfg.federate.save_to)))

all_cors = []

for subject in subjects:
test_df = pd.read_csv(os.path.join(data_dir, "dev",
subject + "_dev.csv"),
header=None)
test_df = test_df.iloc[:, 1:7]

cors, acc, probs = eval(subject, model, tokenizer, test_df, device)
all_cors.append(cors)

test_df["{}_correct".format(init_cfg.federate.save_to)] = cors
for j in range(probs.shape[1]):
choice = choices[j]
test_df["{}_choice{}_probs".format(init_cfg.federate.save_to,
choice)] = probs[:, j]
test_df.to_csv(
os.path.join(eval_dir,
"results_{}".format(init_cfg.federate.save_to),
"{}.csv".format(subject)),
index=None,
)

results = {"subcategories": {}, "categories": {}}

weighted_acc = np.mean(np.concatenate(all_cors))
results["weighted_accuracy"] = weighted_acc
print("Average accuracy: {:.3f}".format(weighted_acc))

results_file = os.path.join(
eval_dir, "accuracies_{}.json".format(
init_cfg.federate.save_to.replace("/", "_")))
with open(results_file, "w") as f:
json.dump(results, f)


if __name__ == "__main__":
main()
75 changes: 75 additions & 0 deletions federatedscope/llm/eval/eval_for_medical/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os

import numpy as np
import transformers
from tqdm import tqdm

from federatedscope.core.configs.config import global_cfg
from federatedscope.core.cmd_args import parse_args, parse_client_cfg
from federatedscope.core.auxiliaries.utils import setup_seed
from federatedscope.core.auxiliaries.logging import update_logger
from federatedscope.core.data.utils import download_url
from federatedscope.llm.dataloader.dataloader import load_jsonl
from federatedscope.llm.misc.fschat import FSChatBot

transformers.logging.set_verbosity(40)

DEBUG = False


def is_correct(model_answer, answer):
return model_answer == answer


def main():
init_cfg = global_cfg.clone()
args = parse_args()

if args.cfg_file:
init_cfg.merge_from_file(args.cfg_file)
cfg_opt, client_cfg_opt = parse_client_cfg(args.opts)
init_cfg.merge_from_list(cfg_opt)

update_logger(init_cfg, clear_before_add=True)
setup_seed(init_cfg.seed)

# load your finetuned model (saved as xxx.ckpt)
# in yaml file federate.save_to
fschatbot = FSChatBot(init_cfg)

# Get test file
fp = os.path.join(init_cfg.data.root, "medical_tc_test.jsonl")
if not os.path.exists(fp):
download_url(
'https://federatedscope.oss-cn-beijing.aliyuncs.com/FS-LLM'
'/medical_tc_test.jsonl', init_cfg.data.root)
os.rename(os.path.join(init_cfg.data.root, 'test.jsonl'), fp)

list_data_dict = load_jsonl(fp,
instruction='instruction',
input='input',
output='output',
category='output')

answers = []
for sample in tqdm(list_data_dict):
input_text = sample['instruction'] + sample["input"]
generate_kwargs = dict(max_new_tokens=256, top_p=0.95, temperature=0.8)
model_answer = fschatbot.generate(input_text, generate_kwargs)

is_cor = is_correct(model_answer[0], sample['output'][0])
answers.append(is_cor)
if DEBUG:
print(f'Full input_text:\n{input_text}\n\n')
print(f'Question: {sample["instruction"]}\n\n'
f'Answers: {sample["output"]}\n\n'
f'Model Answers: {model_answer}\n\n'
f'Is correct: {is_cor}\n\n')

print(f'Num of total question: {len(answers)}, '
f'correct num: {sum(answers)}, '
f'correct rate: {float(sum(answers))/len(answers)}.')


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions federatedscope/llm/eval/eval_for_mmlu/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from federatedscope.core.data.utils import download_url
import tarfile

# import torch._dynamo
# torch._dynamo.config.suppress_errors = True

transformers.logging.set_verbosity(40)

choices = ["A", "B", "C", "D"]
Expand Down