Skip to content

Commit

Permalink
Use transcript timer segment start instead of timer start(audio) for …
Browse files Browse the repository at this point in the history
…started at, finished at memory
  • Loading branch information
beastoin committed Sep 22, 2024
1 parent 4d9e041 commit b796686
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 16 deletions.
1 change: 1 addition & 0 deletions backend/models/processing_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class ProcessingMemory(BaseModel):
audio_url: Optional[str] = None
created_at: datetime
timer_start: float
timer_segment_start: Optional[float] = None
timer_starts: List[float] = []
language: Optional[str] = None # applies only to Friend # TODO: once released migrate db to default 'en'
transcript_segments: List[TranscriptSegment] = []
Expand Down
23 changes: 11 additions & 12 deletions backend/routers/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,11 @@ async def _create_processing_memory():
last_processing_memory_data = processing_memories_db.get_last(uid)
if last_processing_memory_data:
last_processing_memory = ProcessingMemory(**last_processing_memory_data)
segment_end = 0
last_segment_end = 0
for segment in last_processing_memory.transcript_segments:
segment_end = max(segment_end, segment.end)
if last_processing_memory.timer_start + segment_end + min_seconds_limit > time.time():
last_segment_end = max(last_segment_end, segment.end)
timer_segment_start = last_processing_memory.timer_segment_start if last_processing_memory.timer_segment_start else last_processing_memory.timer_start
if timer_segment_start + last_segment_end + min_seconds_limit > time.time():
processing_memory = last_processing_memory

# Or create new
Expand All @@ -364,6 +365,7 @@ async def _create_processing_memory():
id=str(uuid.uuid4()),
created_at=datetime.now(timezone.utc),
timer_start=timer_start,
timer_segment_start=timer_start+segment_start,
language=language,
)

Expand Down Expand Up @@ -557,6 +559,8 @@ async def _try_flush_new_memory_with_lock(time_validate: bool = True):
async def _try_flush_new_memory(time_validate: bool = True):
nonlocal memory_transcript_segements
nonlocal timer_start
nonlocal segment_start
nonlocal segment_end
nonlocal processing_memory
nonlocal processing_memory_synced
nonlocal processing_audio_frames
Expand All @@ -567,13 +571,8 @@ async def _try_flush_new_memory(time_validate: bool = True):
return

# Validate last segment
last_segment = None
if len(memory_transcript_segements) > 0:
last_segment = memory_transcript_segements[-1]
if not last_segment:
if not segment_end:
print("Not last segment or last segment invalid")
if last_segment:
print(f"{last_segment.dict()}")
return

# First chunk, create processing memory
Expand All @@ -584,11 +583,11 @@ async def _try_flush_new_memory(time_validate: bool = True):

# Validate transcript
# Longer 120s
segment_end = last_segment.end
now = time.time()
should_create_memory_time = True
if time_validate:
should_create_memory_time = timer_start + segment_end + min_seconds_limit < now
timer_segment_start = timer_start + segment_start
should_create_memory_time = timer_segment_start + segment_end + min_seconds_limit < now

# 1 words at least
should_create_memory_time_words = min_words_limit == 0
Expand All @@ -602,7 +601,7 @@ async def _try_flush_new_memory(time_validate: bool = True):

should_create_memory = should_create_memory_time and should_create_memory_time_words
print(
f"Should create memory {should_create_memory} - {timer_start} {segment_end} {min_seconds_limit} {now} - {time_validate}, session {session_id}")
f"Should create memory {should_create_memory} - {timer_segment_start} {segment_end} {min_seconds_limit} {now} - {time_validate}, session {session_id}")
if should_create_memory:
memory = await _create_memory()
if not memory:
Expand Down
7 changes: 3 additions & 4 deletions backend/utils/processing_memories.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ async def create_memory_by_processing_memory(uid: str, processing_memory_id: str
if not transcript_segments or len(transcript_segments) == 0:
print("Transcript segments is invalid")
return
timer_start = processing_memory.timer_start
segment_start = transcript_segments[0].start
timer_segment_start = processing_memory.timer_segment_start
segment_end = transcript_segments[-1].end
new_memory = CreateMemory(
started_at=datetime.fromtimestamp(timer_start + segment_start, timezone.utc),
finished_at=datetime.fromtimestamp(timer_start + segment_start + segment_end, timezone.utc),
started_at=datetime.fromtimestamp(timer_segment_start, timezone.utc),
finished_at=datetime.fromtimestamp(timer_segment_start + segment_end, timezone.utc),
language=processing_memory.language,
transcript_segments=transcript_segments,
)
Expand Down

0 comments on commit b796686

Please sign in to comment.