forked from sashabaranov/go-openai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
completion.go
212 lines (194 loc) · 8.15 KB
/
completion.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
package openai
import (
"context"
"errors"
"net/http"
)
var (
ErrCompletionUnsupportedModel = errors.New("this model is not supported with this method, please use CreateChatCompletion client method instead") //nolint:lll
ErrCompletionStreamNotSupported = errors.New("streaming is not supported with this method, please use CreateCompletionStream") //nolint:lll
ErrCompletionRequestPromptTypeNotSupported = errors.New("the type of CompletionRequest.Prompt only supports string and []string") //nolint:lll
)
// GPT3 Defines the models provided by OpenAI to use when generating
// completions from OpenAI.
// GPT3 Models are designed for text-based tasks. For code-specific
// tasks, please refer to the Codex series of models.
const (
GPT432K0613 = "gpt-4-32k-0613"
GPT432K0314 = "gpt-4-32k-0314"
GPT432K = "gpt-4-32k"
GPT40613 = "gpt-4-0613"
GPT40314 = "gpt-4-0314"
GPT4Turbo = "gpt-4-turbo"
GPT4Turbo20240409 = "gpt-4-turbo-2024-04-09"
GPT4TurboPreview = "gpt-4-turbo-preview"
GPT4Turbo0125 = "gpt-4-0125-preview"
GPT4Turbo1106 = "gpt-4-1106-preview"
GPT4VisionPreview = "gpt-4-vision-preview"
GPT4 = "gpt-4"
GPT3Dot5Turbo0125 = "gpt-3.5-turbo-0125"
GPT3Dot5Turbo1106 = "gpt-3.5-turbo-1106"
GPT3Dot5Turbo0613 = "gpt-3.5-turbo-0613"
GPT3Dot5Turbo0301 = "gpt-3.5-turbo-0301"
GPT3Dot5Turbo16K = "gpt-3.5-turbo-16k"
GPT3Dot5Turbo16K0613 = "gpt-3.5-turbo-16k-0613"
GPT3Dot5Turbo = "gpt-3.5-turbo"
GPT3Dot5TurboInstruct = "gpt-3.5-turbo-instruct"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextDavinci003 = "text-davinci-003"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextDavinci002 = "text-davinci-002"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextCurie001 = "text-curie-001"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextBabbage001 = "text-babbage-001"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextAda001 = "text-ada-001"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3TextDavinci001 = "text-davinci-001"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3DavinciInstructBeta = "davinci-instruct-beta"
GPT3Davinci = "davinci"
GPT3Davinci002 = "davinci-002"
// Deprecated: Will be shut down on January 04, 2024. Use gpt-3.5-turbo-instruct instead.
GPT3CurieInstructBeta = "curie-instruct-beta"
GPT3Curie = "curie"
GPT3Curie002 = "curie-002"
GPT3Ada = "ada"
GPT3Ada002 = "ada-002"
GPT3Babbage = "babbage"
GPT3Babbage002 = "babbage-002"
LLAMA2DOT70B = "meta-llama/Llama-2-70b-chat-hf"
LLAMA3DOT70B = "meta-llama/Llama-3-70b-chat-hf"
CODELLAMA70B = "codellama/CodeLlama-70b-Instruct-hf"
MISTRALInstructDOT7B = "mistralai/Mistral-7B-Instruct-v0.1"
MIXTRALInstructDOT8x7B = "mistralai/Mixtral-8x7B-Instruct-v0.1"
MIXTRALInstructDOT8x22B = "mistralai/Mixtral-8x22B-Instruct-v0.1"
OPENORCAMistralOpenOrca7B = "Open-Orca/Mistral-7B-OpenOrca"
)
// Codex Defines the models provided by OpenAI.
// These models are designed for code-specific tasks, and use
// a different tokenizer which optimizes for whitespace.
const (
CodexCodeDavinci002 = "code-davinci-002"
CodexCodeCushman001 = "code-cushman-001"
CodexCodeDavinci001 = "code-davinci-001"
)
var disabledModelsForEndpoints = map[string]map[string]bool{
"/completions": {
GPT3Dot5Turbo: true,
GPT3Dot5Turbo0301: true,
GPT3Dot5Turbo0613: true,
GPT3Dot5Turbo1106: true,
GPT3Dot5Turbo16K: true,
GPT3Dot5Turbo16K0613: true,
GPT4: true,
GPT4TurboPreview: true,
GPT4VisionPreview: true,
GPT40314: true,
GPT40613: true,
GPT432K: true,
GPT432K0314: true,
GPT432K0613: true,
},
chatCompletionsSuffix: {
CodexCodeDavinci002: true,
CodexCodeCushman001: true,
CodexCodeDavinci001: true,
GPT3TextDavinci003: true,
GPT3TextDavinci002: true,
GPT3TextCurie001: true,
GPT3TextBabbage001: true,
GPT3TextAda001: true,
GPT3TextDavinci001: true,
GPT3DavinciInstructBeta: true,
GPT3Davinci: true,
GPT3CurieInstructBeta: true,
GPT3Curie: true,
GPT3Ada: true,
GPT3Babbage: true,
},
}
func checkEndpointSupportsModel(endpoint, model string) bool {
return !disabledModelsForEndpoints[endpoint][model]
}
func checkPromptType(prompt any) bool {
_, isString := prompt.(string)
_, isStringSlice := prompt.([]string)
return isString || isStringSlice
}
// CompletionRequest represents a request structure for completion API.
type CompletionRequest struct {
Model string `json:"model"`
Prompt any `json:"prompt,omitempty"`
Suffix string `json:"suffix,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
TopP float32 `json:"top_p,omitempty"`
N int `json:"n,omitempty"`
Stream bool `json:"stream,omitempty"`
LogProbs int `json:"logprobs,omitempty"`
Echo bool `json:"echo,omitempty"`
Stop []string `json:"stop,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
BestOf int `json:"best_of,omitempty"`
// LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
// incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
// refs: https://platform.openai.com/docs/api-reference/completions/create#completions/create-logit_bias
LogitBias map[string]int `json:"logit_bias,omitempty"`
User string `json:"user,omitempty"`
}
// CompletionChoice represents one of possible completions.
type CompletionChoice struct {
Text string `json:"text"`
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
LogProbs LogprobResult `json:"logprobs"`
}
// LogprobResult represents logprob result of Choice.
type LogprobResult struct {
Tokens []string `json:"tokens"`
TokenLogprobs []float32 `json:"token_logprobs"`
TopLogprobs []map[string]float32 `json:"top_logprobs"`
TextOffset []int `json:"text_offset"`
}
// CompletionResponse represents a response structure for completion API.
type CompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []CompletionChoice `json:"choices"`
Usage Usage `json:"usage"`
httpHeader
}
// CreateCompletion — API call to create a completion. This is the main endpoint of the API. Returns new text as well
// as, if requested, the probabilities over each alternative token at each position.
//
// If using a fine-tuned model, simply provide the model's ID in the CompletionRequest object,
// and the server will use the model's parameters to generate the completion.
func (c *Client) CreateCompletion(
ctx context.Context,
request CompletionRequest,
) (response CompletionResponse, err error) {
if request.Stream {
err = ErrCompletionStreamNotSupported
return
}
urlSuffix := "/completions"
if !checkEndpointSupportsModel(urlSuffix, request.Model) {
err = ErrCompletionUnsupportedModel
return
}
if !checkPromptType(request.Prompt) {
err = ErrCompletionRequestPromptTypeNotSupported
return
}
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL(urlSuffix, request.Model), withBody(request))
if err != nil {
return
}
err = c.sendRequest(req, &response)
return
}