From 715ea58a0c87192df8529f7243213f01e5d2c143 Mon Sep 17 00:00:00 2001 From: Jivesh Kalra Date: Fri, 14 Jun 2024 09:48:46 +0530 Subject: [PATCH 1/3] Update groq_client.py --- src/llm/groq_client.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/llm/groq_client.py b/src/llm/groq_client.py index 828c9cc6..fdfc5e77 100644 --- a/src/llm/groq_client.py +++ b/src/llm/groq_client.py @@ -1,5 +1,5 @@ from groq import Groq as _Groq - +import time from src.config import Config @@ -9,16 +9,30 @@ def __init__(self): api_key = config.get_groq_api_key() self.client = _Groq(api_key=api_key) - def inference(self, model_id: str, prompt: str) -> str: - chat_completion = self.client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt.strip(), - } - ], - model=model_id, - temperature=0 - ) + def inference(self, model_id: str, prompt: str,retries:int=0) -> str: + try: + chat_completion = self.client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt.strip(), + } + ], + model=model_id, + temperature=0, + ) + return chat_completion.choices[0].message.content + except Exception as e: + print(prompt) + print(e.response.headers) + if e.response.status_code == 429: + if retries <= 10: + retry_after = e.response.headers['retry-after'] + print(f"Rate limit exceeded, waiting for {int(retry_after)+1} seconds, then retrying...") + time.sleep(int(retry_after)+1) - return chat_completion.choices[0].message.content + return self.inference(model_id, prompt,retries+1) + else: + raise RuntimeError(e.response.text) + else: + raise RuntimeError(e.response.text) \ No newline at end of file From 37fa0cdb45e8c7b9ab054445dca63c3eccb08447 Mon Sep 17 00:00:00 2001 From: Jivesh Kalra Date: Fri, 14 Jun 2024 09:56:33 +0530 Subject: [PATCH 2/3] clean up --- src/llm/groq_client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llm/groq_client.py b/src/llm/groq_client.py index fdfc5e77..33d1dada 100644 --- a/src/llm/groq_client.py +++ b/src/llm/groq_client.py @@ -22,9 +22,7 @@ def inference(self, model_id: str, prompt: str,retries:int=0) -> str: temperature=0, ) return chat_completion.choices[0].message.content - except Exception as e: - print(prompt) - print(e.response.headers) + except Exception as e: if e.response.status_code == 429: if retries <= 10: retry_after = e.response.headers['retry-after'] From 135f0c481cfc85fd0f37197506289292db4c95eb Mon Sep 17 00:00:00 2001 From: Jivesh Kalra Date: Fri, 21 Jun 2024 14:58:39 +0530 Subject: [PATCH 3/3] Update groq_client.py Fixed Unresolved attribute reference 'response' for class 'Exception' --- src/llm/groq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm/groq_client.py b/src/llm/groq_client.py index 33d1dada..245445c6 100644 --- a/src/llm/groq_client.py +++ b/src/llm/groq_client.py @@ -1,7 +1,7 @@ from groq import Groq as _Groq import time from src.config import Config - +import requests.exceptions class Groq: def __init__(self): @@ -22,7 +22,7 @@ def inference(self, model_id: str, prompt: str,retries:int=0) -> str: temperature=0, ) return chat_completion.choices[0].message.content - except Exception as e: + except requests.exceptions.HTTPError as e: if e.response.status_code == 429: if retries <= 10: retry_after = e.response.headers['retry-after']