diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py
index 49a9463ad..26a9267f3 100644
--- a/backend/routers/transcribe.py
+++ b/backend/routers/transcribe.py
@@ -428,7 +428,8 @@ async def _post_process_memory(memory: Memory):
 
                     # merge
                     merge_file_path = f"_temp/{memory.id}_{uuid.uuid4()}_be"
-                    merge_wav_files(merge_file_path, [previous_file_path, file_path])
+                    nearest_timer_start = processing_memory.timer_starts[-2]
+                    merge_wav_files(merge_file_path, [previous_file_path, file_path], [math.ceil(timer_start-nearest_timer_start), 0])
 
                     # clean
                     os.remove(previous_file_path)
diff --git a/backend/utils/audio.py b/backend/utils/audio.py
index 069a2ebdd..bf4663c73 100644
--- a/backend/utils/audio.py
+++ b/backend/utils/audio.py
@@ -3,14 +3,16 @@
 from pyogg import OpusDecoder
 from pydub import AudioSegment
 
-def merge_wav_files(dest_file_path: str, source_files: [str]):
+def merge_wav_files(dest_file_path: str, source_files: [str], silent_seconds: [int]):
     if len(source_files) == 0 or not dest_file_path:
         return
 
     combined_sounds = AudioSegment.empty()
-    for file_path in source_files:
+    for i in range(len(source_files)):
+        file_path = source_files[i]
         sound = AudioSegment.from_wav(file_path)
-        combined_sounds = combined_sounds + sound
+        silent_sec = silent_seconds[i]
+        combined_sounds = combined_sounds + sound + AudioSegment.silent(duration=silent_sec)
     combined_sounds.export(dest_file_path, format="wav")