BasedHardware · beastoin · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · github-actions
diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py
@@ -218,7 +218,9 @@ def stream_audio(audio_buffer):
                 )
 
                 print(f'deepgram-obns3: send_initial_file_path > deepgram_socket {deepgram_socket}')
-                await send_initial_file_path(file_path, deepgram_socket)
+                async def deepgram_socket_send(data):
+                    return deepgram_socket.send(data)
+                await send_initial_file_path(file_path, deepgram_socket_send)
         # SONIOX
         elif stt_service == STTService.soniox:
             soniox_socket = await process_audio_soniox(
@@ -231,7 +233,7 @@ def stream_audio(audio_buffer):
                 stream_transcript, speech_profile_stream_id, sample_rate, language, preseconds=duration
             )
             if duration:
-                await send_initial_file_path(file_path, speechmatics_socket)
+                await send_initial_file_path(file_path, speechmatics_socket.send)
                 print('speech_profile speechmatics duration', duration)
 
     except Exception as e:

diff --git a/backend/utils/stt/streaming.py b/backend/utils/stt/streaming.py
@@ -4,7 +4,7 @@
 from typing import List
 
 import websockets
-from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents
+from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents, ListenWebSocketClient
 from deepgram.clients.live.v1 import LiveOptions
 
 import database.notifications as notification_db
@@ -61,7 +61,7 @@
 #     return segments
 
 
-async def send_initial_file_path(file_path: str, transcript_socket):
+async def send_initial_file_path(file_path: str, transcript_socket_async_send):
     print('send_initial_file_path')
     start = time.time()
     # Reading and sending in chunks
@@ -71,7 +71,7 @@ async def send_initial_file_path(file_path: str, transcript_socket):
             if not chunk:
                 break
             # print('Uploading', len(chunk))
-            await transcript_socket.send(bytes(chunk))
+            await transcript_socket_async_send(bytes(chunk))
             await asyncio.sleep(0.0001)  # if it takes too long to transcribe
 
     print('send_initial_file_path', time.time() - start)
@@ -154,9 +154,39 @@ def process_segments(uid: str, segments: list[dict]):
 def connect_to_deepgram(on_message, on_error, language: str, sample_rate: int, channels: int):
     # 'wss://api.deepgram.com/v1/listen?encoding=linear16&sample_rate=8000&language=$recordingsLanguage&model=nova-2-general&no_delay=true&endpointing=100&interim_results=false&smart_format=true&diarize=true'
     try:
-        dg_connection = deepgram.listen.live.v("1")
+        dg_connection = deepgram.listen.websocket.v("1")
         dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
         dg_connection.on(LiveTranscriptionEvents.Error, on_error)
+
+        def on_open(self, open, **kwargs):
+            print("Connection Open")
+
+        def on_metadata(self, metadata, **kwargs):
+            print(f"Metadata: {metadata}")
+
+        def on_speech_started(self, speech_started, **kwargs):
+            print("Speech Started")
+
+        def on_utterance_end(self, utterance_end, **kwargs):
+            print("Utterance End")
+            global is_finals
+            if len(is_finals) > 0:
+                utterance = " ".join(is_finals)
+                print(f"Utterance End: {utterance}")
+                is_finals = []
+
+        def on_close(self, close, **kwargs):
+            print("Connection Closed")
+
+        def on_unhandled(self, unhandled, **kwargs):
+            print(f"Unhandled Websocket Message: {unhandled}")
+
+        dg_connection.on(LiveTranscriptionEvents.Open, on_open)
+        dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
+        dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
+        dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
+        dg_connection.on(LiveTranscriptionEvents.Close, on_close)
+        dg_connection.on(LiveTranscriptionEvents.Unhandled, on_unhandled)
         options = LiveOptions(
             punctuate=True,
             no_delay=True,