forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test-lora-conversion-inference.sh
executable file
·139 lines (116 loc) · 5.3 KB
/
test-lora-conversion-inference.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/bin/bash
set -e
# Array of models to iterate over
declare -a params=(
"Gemma2ForCausalLM 64"
"LlamaForCausalLM 64"
"Phi3ForCausalLM 64"
)
MODELS_REPO=lora-tests
MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO
# Clone the Hugging Face repository if the directory does not exist
if [ ! -d "$MODELS_REPO" ]; then
echo "Cloning the Hugging Face repository..."
git clone $MODELS_REPO_URL --depth 1
else
echo "Repository already exists. Skipping clone."
fi
# Array to store results to print
results=()
trim_leading_whitespace() {
local input_string="$1"
echo "${input_string#"${input_string%%[![:space:]]*}"}"
}
extract_starting_substring() {
local reference_string="$1"
local target_string="$2"
local target_length=${#target_string}
echo "${reference_string:0:$target_length}"
}
get_first_word() {
local input_string="$1"
read -r first_word _ <<< "$input_string"
echo "$first_word"
}
# Load the expected strings
EXPECTED_BASE_FULL=$(cat $MODELS_REPO/data/pale_blue_dot.txt)
EXPECTED_LORA_FULL=$(cat $MODELS_REPO/data/bohemian_rhapsody.txt)
EXPECTED_BASE_FIRST_WORD=$(get_first_word "$EXPECTED_BASE_FULL")
EXPECTED_LORA_FIRST_WORD=$(get_first_word "$EXPECTED_LORA_FULL")
run_conversion_and_inference_lora() {
local model_name=$1
local hidden_size=$2
echo -e "\n\n-------- RUNNING TEST FOR MODEL $model_name --------\n\n"
# Convert safetensors to gguf
echo "Running convert_hf_to_gguf.py for $model_name with hidden_size $hidden_size..."
python convert_hf_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \
--outfile $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
--outtype f32
echo -e "\n\n---------------------------\n\n"
echo "Running convert_lora_to_gguf.py for $model_name with hidden_size $hidden_size..."
python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora \
--base $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \
--outtype f32
echo -e "\n\n---------------------------\n\n"
echo "Running llama-export-lora with lora for $model_name with hidden_size $hidden_size..."
./llama-export-lora \
-m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
-o $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \
--lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf
# Run inference
echo -e "\n\n---------------------------\n\n"
echo "Running llama-cli without lora for $model_name with hidden_size $hidden_size..."
OUTPUT_BASE=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
-p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0)
echo -e "\n\n---------------------------\n\n"
echo "Running llama-cli with hot lora for $model_name with hidden_size $hidden_size..."
OUTPUT_LORA_HOT=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
--lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \
-p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0)
echo -e "\n\n---------------------------\n\n"
echo "Running llama-cli with merged lora for $model_name with hidden_size $hidden_size..."
OUTPUT_LORA_MERGED=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \
-p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0)
# Remove any initial white space
OUTPUT_BASE=$(trim_leading_whitespace "$OUTPUT_BASE")
OUTPUT_LORA_HOT=$(trim_leading_whitespace "$OUTPUT_LORA_HOT")
OUTPUT_LORA_MERGED=$(trim_leading_whitespace "$OUTPUT_LORA_MERGED")
# Extract the corresponding substring from full string
EXPECTED_BASE=$(extract_starting_substring "$EXPECTED_BASE_FULL" "$OUTPUT_BASE")
EXPECTED_LORA=$(extract_starting_substring "$EXPECTED_LORA_FULL" "$OUTPUT_LORA_HOT")
# Assert output equals the expected output
if [[ "$OUTPUT_BASE" != "$EXPECTED_BASE" ]]; then
echo "Error: $model_name OUTPUT_BASE does not start with the expected string."
echo -e "Out=$OUTPUT_BASE\n\nExp=$EXPECTED_BASE"
exit 1
fi
if [[ "$OUTPUT_LORA_HOT" != "$EXPECTED_LORA" ]]; then
echo "Error: $model_name OUTPUT_LORA_HOT does not start with the expected string."
echo -e "Out=$OUTPUT_LORA_HOT\n\nExp=$EXPECTED_LORA"
exit 1
fi
if [[ "$OUTPUT_LORA_MERGED" != "$EXPECTED_LORA" ]]; then
echo "Error: $model_name OUTPUT_LORA_MERGED does not start with the expected string."
echo -e "Out=$OUTPUT_LORA_MERGED\n\nExp=$EXPECTED_LORA"
exit 1
fi
# Store the results
results+=("
\n\033[1mResults for $model_name with hidden_size $hidden_size:\033[0m
\n\033[32m • Base:\n$OUTPUT_BASE
\n\033[34m • Lora hot:\n$OUTPUT_LORA_HOT
\n\033[36m • Lora merged:\n$OUTPUT_LORA_MERGED
\n \033[0m
")
echo "All tests passed for $model_name with hidden_size $hidden_size!"
}
# Run test for each model
for param in "${params[@]}"; do
run_conversion_and_inference_lora $param
done
# Print results
echo -e "\n\n---------------------------\n\n"
echo -e "\n\033[1mSummary of All Results:\033[0m"
for result in "${results[@]}"; do
echo -e "$result"
done