Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use socket send async func to wrap the various socket send function w… #911

Merged
merged 2 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions backend/routers/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ def stream_audio(audio_buffer):
)

print(f'deepgram-obns3: send_initial_file_path > deepgram_socket {deepgram_socket}')
await send_initial_file_path(file_path, deepgram_socket)
async def deepgram_socket_send(data):
return deepgram_socket.send(data)
await send_initial_file_path(file_path, deepgram_socket_send)
Comment on lines +221 to +223

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description Entelligence.AI

The change from directly using the deepgram_socket to send data to wrapping it in a new function deepgram_socket_send seems unnecessary. The send_initial_file_path function is now expecting a function instead of a socket object, which could lead to confusion for future developers. If there's a specific reason for this change, consider adding comments to clarify.

- async def deepgram_socket_send(data):
-     return deepgram_socket.send(data)
- await send_initial_file_path(file_path, deepgram_socket_send)
+ await send_initial_file_path(file_path, deepgram_socket)

# SONIOX
elif stt_service == STTService.soniox:
soniox_socket = await process_audio_soniox(
Expand All @@ -231,7 +233,7 @@ def stream_audio(audio_buffer):
stream_transcript, speech_profile_stream_id, sample_rate, language, preseconds=duration
)
if duration:
await send_initial_file_path(file_path, speechmatics_socket)
await send_initial_file_path(file_path, speechmatics_socket.send)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description Entelligence.AI

Similar to the previous comment, the change from directly using the speechmatics_socket to send data to passing the send method of the socket object to send_initial_file_path could be confusing. If there's a specific reason for this change, consider adding comments to clarify.

- await send_initial_file_path(file_path, speechmatics_socket.send)
+ await send_initial_file_path(file_path, speechmatics_socket)

print('speech_profile speechmatics duration', duration)

except Exception as e:
Expand Down
38 changes: 34 additions & 4 deletions backend/utils/stt/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import List

import websockets
from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents
from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents, ListenWebSocketClient

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description Entelligence.AI

The import statement has been modified to include ListenWebSocketClient but it is not used anywhere in the provided code. If it's not used, it should be removed to keep the code clean and maintainable.

- from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents, ListenWebSocketClient
+ from deepgram import DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents

from deepgram.clients.live.v1 import LiveOptions

import database.notifications as notification_db
Expand Down Expand Up @@ -61,7 +61,7 @@
# return segments


async def send_initial_file_path(file_path: str, transcript_socket):
async def send_initial_file_path(file_path: str, transcript_socket_async_send):
print('send_initial_file_path')
start = time.time()
# Reading and sending in chunks
Expand All @@ -71,7 +71,7 @@ async def send_initial_file_path(file_path: str, transcript_socket):
if not chunk:
break
# print('Uploading', len(chunk))
await transcript_socket.send(bytes(chunk))
await transcript_socket_async_send(bytes(chunk))
await asyncio.sleep(0.0001) # if it takes too long to transcribe

print('send_initial_file_path', time.time() - start)
Expand Down Expand Up @@ -154,9 +154,39 @@ def process_segments(uid: str, segments: list[dict]):
def connect_to_deepgram(on_message, on_error, language: str, sample_rate: int, channels: int):
# 'wss://api.deepgram.com/v1/listen?encoding=linear16&sample_rate=8000&language=$recordingsLanguage&model=nova-2-general&no_delay=true&endpointing=100&interim_results=false&smart_format=true&diarize=true'
try:
dg_connection = deepgram.listen.live.v("1")
dg_connection = deepgram.listen.websocket.v("1")
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
dg_connection.on(LiveTranscriptionEvents.Error, on_error)

def on_open(self, open, **kwargs):
print("Connection Open")

def on_metadata(self, metadata, **kwargs):
print(f"Metadata: {metadata}")

def on_speech_started(self, speech_started, **kwargs):
print("Speech Started")

def on_utterance_end(self, utterance_end, **kwargs):
print("Utterance End")
global is_finals
if len(is_finals) > 0:
utterance = " ".join(is_finals)
print(f"Utterance End: {utterance}")
is_finals = []

def on_close(self, close, **kwargs):
print("Connection Closed")

def on_unhandled(self, unhandled, **kwargs):
print(f"Unhandled Websocket Message: {unhandled}")

dg_connection.on(LiveTranscriptionEvents.Open, on_open)
dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
dg_connection.on(LiveTranscriptionEvents.Close, on_close)
dg_connection.on(LiveTranscriptionEvents.Unhandled, on_unhandled)
Comment on lines +157 to +189

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description Entelligence.AI

The event handling functions on_open, on_metadata, on_speech_started, on_utterance_end, on_close, on_unhandled are defined within the connect_to_deepgram function but they don't have access to the self parameter as they are not part of a class. This will cause a runtime error when these functions are called.

Also, the variable is_finals is referenced in the on_utterance_end function but it is not defined anywhere in the provided code. This will also cause a runtime error.

Here's a possible fix: remove the self parameter from the function definitions and define is_finals before using it.

-        def on_open(self, open, **kwargs):
+        def on_open(open, **kwargs):
             print("Connection Open")

-        def on_metadata(self, metadata, **kwargs):
+        def on_metadata(metadata, **kwargs):
             print(f"Metadata: {metadata}")

-        def on_speech_started(self, speech_started, **kwargs):
+        def on_speech_started(speech_started, **kwargs):
             print("Speech Started")

-        def on_utterance_end(self, utterance_end, **kwargs):
+        def on_utterance_end(utterance_end, **kwargs):
             print("Utterance End")
+            is_finals = []
             if len(is_finals) > 0:
                 utterance = " ".join(is_finals)
                 print(f"Utterance End: {utterance}")
                 is_finals = []

-        def on_close(self, close, **kwargs):
+        def on_close(close, **kwargs):
             print("Connection Closed")

-        def on_unhandled(self, unhandled, **kwargs):
+        def on_unhandled(unhandled, **kwargs):
             print(f"Unhandled Websocket Message: {unhandled}")

options = LiveOptions(
punctuate=True,
no_delay=True,
Expand Down
Loading