diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py index 49a9463ad..26a9267f3 100644 --- a/backend/routers/transcribe.py +++ b/backend/routers/transcribe.py @@ -428,7 +428,8 @@ async def _post_process_memory(memory: Memory): # merge merge_file_path = f"_temp/{memory.id}_{uuid.uuid4()}_be" - merge_wav_files(merge_file_path, [previous_file_path, file_path]) + nearest_timer_start = processing_memory.timer_starts[-2] + merge_wav_files(merge_file_path, [previous_file_path, file_path], [math.ceil(timer_start-nearest_timer_start), 0]) # clean os.remove(previous_file_path) diff --git a/backend/utils/audio.py b/backend/utils/audio.py index 069a2ebdd..bf4663c73 100644 --- a/backend/utils/audio.py +++ b/backend/utils/audio.py @@ -3,14 +3,16 @@ from pyogg import OpusDecoder from pydub import AudioSegment -def merge_wav_files(dest_file_path: str, source_files: [str]): +def merge_wav_files(dest_file_path: str, source_files: [str], silent_seconds: [int]): if len(source_files) == 0 or not dest_file_path: return combined_sounds = AudioSegment.empty() - for file_path in source_files: + for i in range(len(source_files)): + file_path = source_files[i] sound = AudioSegment.from_wav(file_path) - combined_sounds = combined_sounds + sound + silent_sec = silent_seconds[i] + combined_sounds = combined_sounds + sound + AudioSegment.silent(duration=silent_sec) combined_sounds.export(dest_file_path, format="wav")