GooeyAI · clr-li · May 17, 2024 · May 17, 2024 · May 17, 2024 · May 24, 2024
diff --git a/daras_ai_v2/language_model.py b/daras_ai_v2/language_model.py
@@ -318,13 +318,14 @@ def _deprecated(cls):
 
 def calc_gpt_tokens(
     prompt: str | list[str] | dict | list[dict],
+    used_model: str,
 ) -> int:
     if isinstance(prompt, (str, dict)):
         messages = [prompt]
     else:
         messages = prompt
     combined = msgs_to_prompt_str(messages)
-    return default_length_function(combined)
+    return default_length_function(combined, used_model)
 
 
 class ConversationEntry(typing_extensions.TypedDict):
@@ -810,14 +811,16 @@ def record_openai_llm_usage(
         model=used_model,
         sku=ModelSku.llm_prompt,
         quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in messages
+            default_length_function(get_entry_text(entry), used_model)
+            for entry in messages
         ),
     )
     record_cost_auto(
         model=used_model,
         sku=ModelSku.llm_completion,
         quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in choices
+            default_length_function(get_entry_text(entry), used_model)
+            for entry in choices
         ),
     )
 

diff --git a/daras_ai_v2/text_splitter.py b/daras_ai_v2/text_splitter.py
@@ -40,11 +40,11 @@
 threadlocal = threading.local()
 
 
-def default_length_function(text: str) -> int:
+def default_length_function(text: str, used_model: str = "gpt-4") -> int:
     try:
         enc = threadlocal.enc
     except AttributeError:
-        enc = tiktoken.encoding_for_model("gpt-4")
+        enc = tiktoken.encoding_for_model(used_model)
         threadlocal.enc = enc
     return len(enc.encode(text))
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ pdftotext = "^2.2.2"
 "pdfminer.six" = "^20221105"
 google-api-python-client = "^2.80.0"
 oauth2client = "^4.1.3"
-tiktoken = "^0.3.2"
+tiktoken = "^0.7.0"
 google-cloud-translate = "^3.12.0"
 google-cloud-speech = "^2.21.0"
 yt-dlp = "^2023.3.4"

diff --git a/recipes/DocSummary.py b/recipes/DocSummary.py
@@ -216,7 +216,8 @@ def _map_reduce(request: "DocSummaryPage.RequestModel", full_text: str, state: d
 
     safety_buffer = 100
     prompt_token_count = (
-        calc_gpt_tokens(task_instructions + merge_instructions) + safety_buffer
+        calc_gpt_tokens(task_instructions + merge_instructions, model.name)
+        + safety_buffer
     )
 
     # to merge 2 outputs, we need to have at least 1/3 of the max tokens available

diff --git a/recipes/SEOSummary.py b/recipes/SEOSummary.py
@@ -368,10 +368,11 @@ def _gen_final_prompt(
         ]
     )
 
+    used_model = LargeLanguageModels[request.selected_model]
     max_allowed_tokens = (
-        LargeLanguageModels[request.selected_model].context_window
+        used_model.context_window
         - request.max_tokens
-        - calc_gpt_tokens(end_input_prompt)
+        - calc_gpt_tokens(end_input_prompt, used_model.name)
     )
 
     state["final_prompt"] = request.task_instructions.strip() + "\n\n"
@@ -399,7 +400,7 @@ def _gen_final_prompt(
 
         # used too many tokens, abort!
         if (
-            calc_gpt_tokens(state["final_prompt"] + next_prompt_part)
+            calc_gpt_tokens(state["final_prompt"] + next_prompt_part, used_model.name)
             > max_allowed_tokens
         ):
             continue

diff --git a/recipes/VideoBots.py b/recipes/VideoBots.py
@@ -835,7 +835,7 @@ def run_v2(
             query_msgs = request.messages + [
                 format_chat_entry(role=CHATML_ROLE_USER, content=user_input)
             ]
-            clip_idx = convo_window_clipper(query_msgs, model.context_window // 2)
+            clip_idx = convo_window_clipper(query_msgs, model.context_window // 2, model.name)
             query_msgs = query_msgs[clip_idx:]
 
             chat_history = messages_as_prompt(query_msgs)
@@ -911,13 +911,14 @@ def run_v2(
         # truncate the history to fit the model's max tokens
         max_history_tokens = (
             model.context_window
-            - calc_gpt_tokens(filter(None, [system_prompt, user_input]))
+            - calc_gpt_tokens(filter(None, [system_prompt, user_input]), model.name)
             - request.max_tokens
             - SAFETY_BUFFER
         )
         clip_idx = convo_window_clipper(
             request.messages,
             max_history_tokens,
+            model.name,
         )
         history_prompt = request.messages[clip_idx:]
         response.final_prompt = list(
@@ -926,7 +927,7 @@ def run_v2(
 
         # ensure input script is not too big
         max_allowed_tokens = model.context_window - calc_gpt_tokens(
-            response.final_prompt
+            response.final_prompt, model.name
         )
         max_allowed_tokens = min(max_allowed_tokens, request.max_tokens)
         if max_allowed_tokens < 0:
@@ -1565,9 +1566,10 @@ def convo_window_clipper(
     max_tokens,
     *,
     step=2,
+    used_model="gpt-4"
 ):
     for i in range(len(window) - 2, -1, -step):
-        if calc_gpt_tokens(window[i:]) > max_tokens:
+        if calc_gpt_tokens(window[i:], used_model) > max_tokens:
             return i + step
     return 0
 

diff --git a/scripts/init_llm_pricing.py b/scripts/init_llm_pricing.py
@@ -5,6 +5,33 @@
 
 
 def run():
+    # GPT-4o
+
+    ModelPricing.objects.get_or_create(
+        model_id="gpt-4o",
+        sku=ModelSku.llm_prompt,
+        defaults=dict(
+            model_name=LargeLanguageModels.gpt_4_o.name,
+            unit_cost=0.005,
+            unit_quantity=1000,
+            category=category,
+            provider=ModelProvider.openai,
+            pricing_url="https://openai.com/api/pricing/",
+        ),
+    )
+    ModelPricing.objects.get_or_create(
+        model_id="gpt-4o",
+        sku=ModelSku.llm_completion,
+        defaults=dict(
+            model_name=LargeLanguageModels.gpt_4_o.name,
+            unit_cost=0.015,
+            unit_quantity=1000,
+            category=category,
+            provider=ModelProvider.openai,
+            pricing_url="https://openai.com/api/pricing/",
+        ),
+    )
+
     # GPT-4-Turbo
 
     for model in ["gpt-4-0125-preview", "gpt-4-1106-preview"]: