Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gpt4o pricing #360

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions daras_ai_v2/language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,13 +318,14 @@ def _deprecated(cls):

def calc_gpt_tokens(
prompt: str | list[str] | dict | list[dict],
used_model: str,
) -> int:
if isinstance(prompt, (str, dict)):
messages = [prompt]
else:
messages = prompt
combined = msgs_to_prompt_str(messages)
return default_length_function(combined)
return default_length_function(combined, used_model)


class ConversationEntry(typing_extensions.TypedDict):
Expand Down Expand Up @@ -810,14 +811,16 @@ def record_openai_llm_usage(
model=used_model,
sku=ModelSku.llm_prompt,
quantity=sum(
default_length_function(get_entry_text(entry)) for entry in messages
default_length_function(get_entry_text(entry), used_model)
for entry in messages
),
)
record_cost_auto(
model=used_model,
sku=ModelSku.llm_completion,
quantity=sum(
default_length_function(get_entry_text(entry)) for entry in choices
default_length_function(get_entry_text(entry), used_model)
for entry in choices
),
)

Expand Down
4 changes: 2 additions & 2 deletions daras_ai_v2/text_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
threadlocal = threading.local()


def default_length_function(text: str) -> int:
def default_length_function(text: str, used_model: str = "gpt-4") -> int:
try:
enc = threadlocal.enc
except AttributeError:
enc = tiktoken.encoding_for_model("gpt-4")
enc = tiktoken.encoding_for_model(used_model)
threadlocal.enc = enc
return len(enc.encode(text))

Expand Down
92 changes: 58 additions & 34 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pdftotext = "^2.2.2"
"pdfminer.six" = "^20221105"
google-api-python-client = "^2.80.0"
oauth2client = "^4.1.3"
tiktoken = "^0.3.2"
tiktoken = "^0.7.0"
google-cloud-translate = "^3.12.0"
google-cloud-speech = "^2.21.0"
yt-dlp = "^2023.3.4"
Expand Down
3 changes: 2 additions & 1 deletion recipes/DocSummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ def _map_reduce(request: "DocSummaryPage.RequestModel", full_text: str, state: d

safety_buffer = 100
prompt_token_count = (
calc_gpt_tokens(task_instructions + merge_instructions) + safety_buffer
calc_gpt_tokens(task_instructions + merge_instructions, model.name)
+ safety_buffer
)

# to merge 2 outputs, we need to have at least 1/3 of the max tokens available
Expand Down
7 changes: 4 additions & 3 deletions recipes/SEOSummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,11 @@ def _gen_final_prompt(
]
)

used_model = LargeLanguageModels[request.selected_model]
max_allowed_tokens = (
LargeLanguageModels[request.selected_model].context_window
used_model.context_window
- request.max_tokens
- calc_gpt_tokens(end_input_prompt)
- calc_gpt_tokens(end_input_prompt, used_model.name)
)

state["final_prompt"] = request.task_instructions.strip() + "\n\n"
Expand Down Expand Up @@ -399,7 +400,7 @@ def _gen_final_prompt(

# used too many tokens, abort!
if (
calc_gpt_tokens(state["final_prompt"] + next_prompt_part)
calc_gpt_tokens(state["final_prompt"] + next_prompt_part, used_model.name)
> max_allowed_tokens
):
continue
Expand Down
10 changes: 6 additions & 4 deletions recipes/VideoBots.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ def run_v2(
query_msgs = request.messages + [
format_chat_entry(role=CHATML_ROLE_USER, content=user_input)
]
clip_idx = convo_window_clipper(query_msgs, model.context_window // 2)
clip_idx = convo_window_clipper(query_msgs, model.context_window // 2, model.name)
query_msgs = query_msgs[clip_idx:]

chat_history = messages_as_prompt(query_msgs)
Expand Down Expand Up @@ -911,13 +911,14 @@ def run_v2(
# truncate the history to fit the model's max tokens
max_history_tokens = (
model.context_window
- calc_gpt_tokens(filter(None, [system_prompt, user_input]))
- calc_gpt_tokens(filter(None, [system_prompt, user_input]), model.name)
- request.max_tokens
- SAFETY_BUFFER
)
clip_idx = convo_window_clipper(
request.messages,
max_history_tokens,
model.name,
)
history_prompt = request.messages[clip_idx:]
response.final_prompt = list(
Expand All @@ -926,7 +927,7 @@ def run_v2(

# ensure input script is not too big
max_allowed_tokens = model.context_window - calc_gpt_tokens(
response.final_prompt
response.final_prompt, model.name
)
max_allowed_tokens = min(max_allowed_tokens, request.max_tokens)
if max_allowed_tokens < 0:
Expand Down Expand Up @@ -1565,9 +1566,10 @@ def convo_window_clipper(
max_tokens,
*,
step=2,
used_model="gpt-4"
):
for i in range(len(window) - 2, -1, -step):
if calc_gpt_tokens(window[i:]) > max_tokens:
if calc_gpt_tokens(window[i:], used_model) > max_tokens:
return i + step
return 0

Expand Down
27 changes: 27 additions & 0 deletions scripts/init_llm_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@


def run():
# GPT-4o

ModelPricing.objects.get_or_create(
model_id="gpt-4o",
sku=ModelSku.llm_prompt,
defaults=dict(
model_name=LargeLanguageModels.gpt_4_o.name,
unit_cost=0.005,
unit_quantity=1000,
category=category,
provider=ModelProvider.openai,
pricing_url="https://openai.com/api/pricing/",
),
)
ModelPricing.objects.get_or_create(
model_id="gpt-4o",
sku=ModelSku.llm_completion,
defaults=dict(
model_name=LargeLanguageModels.gpt_4_o.name,
unit_cost=0.015,
unit_quantity=1000,
category=category,
provider=ModelProvider.openai,
pricing_url="https://openai.com/api/pricing/",
),
)

# GPT-4-Turbo

for model in ["gpt-4-0125-preview", "gpt-4-1106-preview"]:
Expand Down