-
Notifications
You must be signed in to change notification settings - Fork 471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Align start seconds for transcript segment #906
Changes from 7 commits
2229df9
60376d9
8010780
a306ce5
978957d
bd95410
8465de6
60b33c4
35027e2
461d062
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import threading | ||
import math | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
import asyncio | ||
import time | ||
from typing import List | ||
|
@@ -139,10 +140,27 @@ def stream_transcript(segments, stream_id): | |
nonlocal processing_memory | ||
nonlocal processing_memory_synced | ||
nonlocal memory_transcript_segements | ||
nonlocal segment_start | ||
nonlocal segment_end | ||
|
||
if not segments or len(segments) == 0: | ||
return | ||
|
||
# Align the start, end segment | ||
if not segment_start: | ||
start = segments[0]["start"] | ||
segment_start = start | ||
|
||
# end | ||
end = segments[-1]["end"] | ||
if not segment_end or segment_end < end: | ||
segment_end = end | ||
|
||
for i, segment in enumerate(segments): | ||
segment["start"] -= segment_start | ||
segment["end"] -= segment_start | ||
segments[i] = segment | ||
|
||
Comment on lines
+143
to
+163
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for aligning the start and end of segments seems correct. However, there is no error handling or validation for the # Align the start, end segment
if not segment_start:
start = segments[0].get("start")
if start is None:
raise ValueError("Each segment should have a 'start' field.")
segment_start = start
# end
end = segments[-1].get("end")
if end is None:
raise ValueError("Each segment should have an 'end' field.")
if not segment_end or segment_end < end:
segment_end = end
for i, segment in enumerate(segments):
if "start" not in segment or "end" not in segment:
raise ValueError("Each segment should have 'start' and 'end' fields.")
segment["start"] -= segment_start
segment["end"] -= segment_start
segments[i] = segment |
||
asyncio.run_coroutine_threadsafe(websocket.send_json(segments), loop) | ||
threading.Thread(target=process_segments, args=(uid, segments)).start() | ||
|
||
|
@@ -178,6 +196,9 @@ def stream_audio(audio_buffer): | |
websocket_active = True | ||
websocket_close_code = 1001 # Going Away, don't close with good from backend | ||
timer_start = None | ||
segment_start = None | ||
segment_end = None | ||
audio_frames_per_sec = 100 | ||
# audio_buffer = None | ||
duration = 0 | ||
try: | ||
|
@@ -239,7 +260,8 @@ async def receive_audio(dg_socket1, dg_socket2, soniox_socket, speechmatics_sock | |
# audio_file = open(path, "a") | ||
try: | ||
while websocket_active: | ||
data = await websocket.receive_bytes() | ||
raw_data = await websocket.receive_bytes() | ||
data = raw_data[:] | ||
Comment on lines
+263
to
+264
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
# audio_buffer.extend(data) | ||
if codec == 'opus' and sample_rate == 16000: | ||
data = decoder.decode(bytes(data), frame_size=160) | ||
|
@@ -262,7 +284,7 @@ async def receive_audio(dg_socket1, dg_socket2, soniox_socket, speechmatics_sock | |
dg_socket2.send(data) | ||
|
||
# stream | ||
stream_audio(data) | ||
stream_audio(raw_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
# audio_buffer = bytearray() | ||
|
||
|
@@ -331,10 +353,11 @@ async def _create_processing_memory(): | |
last_processing_memory_data = processing_memories_db.get_last(uid) | ||
if last_processing_memory_data: | ||
last_processing_memory = ProcessingMemory(**last_processing_memory_data) | ||
segment_end = 0 | ||
last_segment_end = 0 | ||
for segment in last_processing_memory.transcript_segments: | ||
segment_end = max(segment_end, segment.end) | ||
if last_processing_memory.timer_start + segment_end + min_seconds_limit > time.time(): | ||
last_segment_end = max(last_segment_end, segment.end) | ||
timer_segment_start = last_processing_memory.timer_segment_start if last_processing_memory.timer_segment_start else last_processing_memory.timer_start | ||
if timer_segment_start + last_segment_end + min_seconds_limit > time.time(): | ||
Comment on lines
+356
to
+360
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variable name - last_segment_end = 0
+ max_segment_end = 0
- for segment in last_processing_memory.transcript_segments:
- last_segment_end = max(last_segment_end, segment.end)
+ for segment in last_processing_memory.transcript_segments:
+ max_segment_end = max(max_segment_end, segment.end) |
||
processing_memory = last_processing_memory | ||
|
||
# Or create new | ||
|
@@ -343,6 +366,7 @@ async def _create_processing_memory(): | |
id=str(uuid.uuid4()), | ||
created_at=datetime.now(timezone.utc), | ||
timer_start=timer_start, | ||
timer_segment_start=timer_start+segment_start, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
language=language, | ||
) | ||
|
||
|
@@ -375,11 +399,22 @@ async def _post_process_memory(memory: Memory): | |
nonlocal processing_memory | ||
nonlocal processing_audio_frames | ||
nonlocal processing_audio_frame_synced | ||
nonlocal segment_start | ||
nonlocal segment_end | ||
|
||
# Create wav | ||
processing_audio_frame_synced = len(processing_audio_frames) | ||
|
||
# Remove audio frames [start, end] | ||
left = 0 | ||
if segment_start: | ||
left = max(left, math.floor(segment_start) * audio_frames_per_sec) | ||
right = processing_audio_frame_synced | ||
if segment_end: | ||
right = min(math.ceil(segment_end) * audio_frames_per_sec, right) | ||
|
||
Comment on lines
+402
to
+415
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for creating a WAV file seems correct. However, there is no error handling or validation for the if segment_start:
+ if not isinstance(segment_start, (int, float)):
+ raise TypeError("`segment_start` should be a number.")
left = max(left, math.floor(segment_start) * audio_frames_per_sec)
if segment_end:
+ if not isinstance(segment_end, (int, float)):
+ raise TypeError("`segment_end` should be a number.")
right = min(math.ceil(segment_end) * audio_frames_per_sec, right) |
||
file_path = f"_temp/{memory.id}_{uuid.uuid4()}_be" | ||
create_wav_from_bytes(file_path=file_path, frames=processing_audio_frames[:processing_audio_frame_synced], | ||
create_wav_from_bytes(file_path=file_path, frames=processing_audio_frames[left:right], | ||
Comment on lines
+402
to
+417
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for creating a WAV file from a subset of audio frames based on segment start and end times seems correct. However, there is no error handling or validation for the # Remove audio frames [start, end]
left = 0
if segment_start:
left = max(left, math.floor(segment_start) * audio_frames_per_sec)
right = processing_audio_frame_synced
if segment_end:
right = min(math.ceil(segment_end) * audio_frames_per_sec, right)
file_path = f"_temp/{memory.id}_{uuid.uuid4()}_be"
create_wav_from_bytes(file_path=file_path, frames=processing_audio_frames[left:right],
frame_rate=sample_rate, channels=channels, codec=codec, ) |
||
frame_rate=sample_rate, channels=channels, codec=codec, ) | ||
|
||
# Try merge new audio with the previous | ||
|
@@ -525,6 +560,8 @@ async def _try_flush_new_memory_with_lock(time_validate: bool = True): | |
async def _try_flush_new_memory(time_validate: bool = True): | ||
nonlocal memory_transcript_segements | ||
nonlocal timer_start | ||
nonlocal segment_start | ||
nonlocal segment_end | ||
nonlocal processing_memory | ||
nonlocal processing_memory_synced | ||
nonlocal processing_audio_frames | ||
|
@@ -535,13 +572,8 @@ async def _try_flush_new_memory(time_validate: bool = True): | |
return | ||
|
||
# Validate last segment | ||
last_segment = None | ||
if len(memory_transcript_segements) > 0: | ||
last_segment = memory_transcript_segements[-1] | ||
if not last_segment: | ||
if not segment_end: | ||
print("Not last segment or last segment invalid") | ||
Comment on lines
+580
to
581
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The validation for if not segment_end:
print("Not last segment or last segment invalid")
+ return |
||
if last_segment: | ||
print(f"{last_segment.dict()}") | ||
return | ||
|
||
# First chunk, create processing memory | ||
|
@@ -552,11 +584,11 @@ async def _try_flush_new_memory(time_validate: bool = True): | |
|
||
# Validate transcript | ||
# Longer 120s | ||
segment_end = last_segment.end | ||
now = time.time() | ||
should_create_memory_time = True | ||
timer_segment_start = timer_start + segment_start | ||
if time_validate: | ||
should_create_memory_time = timer_start + segment_end + min_seconds_limit < now | ||
should_create_memory_time = timer_segment_start + segment_end + min_seconds_limit < now | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for validating the transcript seems correct. However, there is no error handling or validation for the data within the segments. It would be beneficial to add checks to ensure that the "start" and "end" keys exist in each segment and that their values are numbers. This will prevent potential KeyError or TypeError exceptions. Also, the check Here's an example of how you could adjust this section: now = time.time()
should_create_memory_time = True
timer_segment_start = timer_start + segment_start
if time_validate is not None:
should_create_memory_time = timer_segment_start + segment_end + min_seconds_limit < now |
||
|
||
# 1 words at least | ||
should_create_memory_time_words = min_words_limit == 0 | ||
|
@@ -570,7 +602,7 @@ async def _try_flush_new_memory(time_validate: bool = True): | |
|
||
should_create_memory = should_create_memory_time and should_create_memory_time_words | ||
print( | ||
f"Should create memory {should_create_memory} - {timer_start} {segment_end} {min_seconds_limit} {now} - {time_validate}, session {session_id}") | ||
f"Should create memory {should_create_memory} - {timer_segment_start} {segment_end} {min_seconds_limit} {now} - {time_validate}, session {session_id}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
if should_create_memory: | ||
memory = await _create_memory() | ||
if not memory: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,11 +23,11 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str | |
if not transcript_segments or len(transcript_segments) == 0: | ||
print("Transcript segments is invalid") | ||
return | ||
timer_start = processing_memory.timer_start | ||
timer_segment_start = processing_memory.timer_segment_start if processing_memory.timer_segment_start else processing_memory.timer_start | ||
segment_end = transcript_segments[-1].end | ||
new_memory = CreateMemory( | ||
started_at=datetime.fromtimestamp(timer_start, timezone.utc), | ||
finished_at=datetime.fromtimestamp(timer_start + segment_end, timezone.utc), | ||
started_at=datetime.fromtimestamp(timer_segment_start, timezone.utc), | ||
finished_at=datetime.fromtimestamp(timer_segment_start + segment_end, timezone.utc), | ||
Comment on lines
+26
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes made here are logical and seem to be in line with the PR's goal of aligning start seconds for transcript segments. However, there is a potential issue with error handling. If - timer_segment_start = processing_memory.timer_segment_start if processing_memory.timer_segment_start else processing_memory.timer_start
+ timer_segment_start = processing_memory.timer_segment_start if processing_memory.timer_segment_start else processing_memory.timer_start
+ if timer_segment_start is None:
+ print("Timer segment start is invalid")
+ return This way, we can avoid unexpected exceptions and provide a more informative error message. |
||
language=processing_memory.language, | ||
transcript_segments=transcript_segments, | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Entelligence.AI
The addition of
timer_segment_start
as an optional field in theProcessingMemory
class seems fine. However, it would be beneficial to add a comment explaining what this new field represents and how it differs fromtimer_start
. This will help other developers understand its purpose more easily.