-
Notifications
You must be signed in to change notification settings - Fork 2
/
audio-to-text.py
54 lines (45 loc) · 1.55 KB
/
audio-to-text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#Import all the packages
import os
import speech_recognition as sr
from tqdm import tqdm
from multiprocessing.dummy import Pool
pool = Pool(8) # Number of concurrent threads
# Load the api-key from the json file of Google Cloud Vision API
with open("G:/AllStuff E Drive/HGS_mini_project/api-key.json") as f:
GOOGLE_CLOUD_SPEECH_CREDENTIALS = f.read()
# Read all the audio files from the parts/* folder.
r = sr.Recognizer()
files = sorted(os.listdir('G:/AllStuff E Drive/HGS_mini_project/parts/'))
#Transcript function
def transcribe(data):
idx, file = data
name = "parts/" + file
print(name + " started")
# Load audio file
with sr.AudioFile(name) as source:
audio = r.record(source)
# Transcribe audio file
text = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
print(name + " done")
return {
"idx": idx,
"text": text
}
all_text = pool.map(transcribe, enumerate(files))
pool.close()
pool.join()
transcript = ""
for t in sorted(all_text, key=lambda x: x['idx']):
total_seconds = t['idx'] * 30
# Cool shortcut from:
# https://stackoverflow.com/questions/775049/python-time-seconds-to-hms
# to get hours, minutes and seconds
m, s = divmod(total_seconds, 60)
h, m = divmod(m, 60)
# Format time as h:m:s - 30 seconds of text
transcript = transcript + "{:0>2d}:{:0>2d}:{:0>2d} {}\n".format(h, m, s, t['text'])
#print the transcript result
print(transcript)
#Save the file in transcript.txt
with open("transcript.txt", "w") as f:
f.write(transcript)