-
Notifications
You must be signed in to change notification settings - Fork 2
/
audio.py
182 lines (142 loc) · 6.41 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import wave
import pyaudio
from spleeter.separator import Separator
from spleeter.audio import STFTBackend
import numpy as np
from tqdm import tqdm
import os
import sys
import pickle
SEPARATE_DICT = {0: 'drums.wav', 1: 'bass.wav', 2: 'vocals.wav', 3: 'other.wav'}
def open_stream(audio_file, CHUNK_MUL=1):
CHUNK = 1024 * CHUNK_MUL
wf = wave.open(audio_file, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
return stream, wf, CHUNK
def get_saved_audio(file):
audio_pkl_filename = file.split('.')[0] + '.pkl'
if os.path.exists(audio_pkl_filename):
print('Saved audio data exists. Skipping preprocessing...')
saved_data = pickle.load(open(audio_pkl_filename, "rb"))
audio_data = saved_data['data']
CHUNK = saved_data['chunk']
RATE = saved_data['rate']
return audio_data, CHUNK, RATE
def get_audio_data(file, save=True):
stream, wf, CHUNK = open_stream(file)
RATE = wf.getframerate()
with tqdm(total=wf.getnframes()) as pbar:
cnt = 0
while True:
# Read next frame
data = wf.readframes(CHUNK)
pbar.update(CHUNK)
if len(data) < CHUNK:
break
data_int = np.frombuffer(data, dtype=np.int32) # Read bytes to int
data_int = np.resize(data_int, (1, CHUNK)) # Handle final CHUNK where size might be less than CHUNK size
if cnt == 0:
all_data = data_int.copy()
else:
all_data = np.append(all_data, data_int, axis=0)
cnt += 1
if save:
audio_pkl_filename = file.split('.')[0] + '.pkl'
pickle.dump({'data': all_data, 'chunk': CHUNK, 'rate': RATE}, open(audio_pkl_filename, "wb"))
return all_data, CHUNK, RATE
def fft_to_buckets(freq, PSD, buckets):
"""
Takes the current CHUNK's frequency response and breaks each frequency in to buckets
- freq: audio files CHUNK of data amplitudes converted in to frequencies
- PSD: power spectral density of each frequency
- buckets: a list of frequencies where each freq in the list will create a range between that freq and the previous
example - [100, 1000, 5000] Hz
"""
idxs = sorted({np.abs(freq - i).argmin() for i in buckets}) # Get indices of freq from closest frequencies in buckets
# Average PSD values in between frequencies defined by buckets
freq_bucket = [PSD[idxs[i]:idxs[i+1]].mean() for i in range(len(idxs)-1)] + [PSD[idxs[-1]:].mean()]
return freq_bucket, idxs
def get_minmax_bucket_freq(audio_data, buckets, rate):
for i, data in enumerate(audio_data):
n = len(data)
fhat = np.fft.fft(data, n)
PSD = np.abs(fhat * np.conj(fhat) / n) # Power Spectral Density
freq = (rate / n) * np.arange(n)
fb, idxs = fft_to_buckets(freq, PSD, buckets) # Chunk frequencies in to buckets
if i == 0:
all_buckets = np.array(fb)
else:
all_buckets = np.vstack((all_buckets, fb))
return np.min(all_buckets, axis=0), np.max(all_buckets, axis=0)
def get_audio_freqs_in_buckets(audio_data_chunk, buckets, rate):
"""
Takes the current CHUNK's audio amplitudes, converts to the frequency domain, then buckets those frequencies
- buckets: a list of frequencies where each freq in the list will create a range between that freq and the previous
example - [100, 1000, 5000] Hz
"""
n = len(audio_data_chunk)
fhat = np.fft.fft(audio_data_chunk, n)
PSD = np.abs(fhat * np.conj(fhat) / n) # Power Spectral Density
freq = (rate / n) * np.arange(n)
fb, _ = fft_to_buckets(freq, PSD, buckets) # Chunk frequencies in to buckets
return fb
def get_split_times(data, rate, thresholds, buckets, buckets_min, buckets_max, min_reset=125, chunk=1024, start_time=0, stop_time=0):
'''
min_reset [ms]: length of time (in ms) to wait before a new split can occur
start_time[s]: start audio data here
stop_time [s]: stop audio data here
'''
stop_time = len(data) * (chunk / rate) if stop_time == 0 else stop_time
min_reset_frame_cnt = int(min_reset / ((chunk / rate) * 1000)) + 2
times = [start_time]
i = 0
while True:
time = i * chunk / rate
freq_buckets = get_audio_freqs_in_buckets(data[i], buckets, rate)
# Scale buckets
scaled = (freq_buckets - buckets_min) / (buckets_max - buckets_min)
# If freq range is above threshold limit. Ignore thresholds set to 0
abv_thresh = any([(s > thresholds[s_idx]) and thresholds[s_idx] > 0 for s_idx, s in enumerate(scaled)])
# Filter to start & stop times
if time >= start_time and time <= stop_time:
if abv_thresh:
times += [time]
i += min_reset_frame_cnt
else:
i += 1
else:
i += 1
# If No More data
if i >= len(data):
# Add final time
times += [stop_time]
break
return times
def moving_average(x, width=10):
return np.convolve(x, np.ones(width), 'valid') / width
def is_increasing(data):
data = moving_average(data, width=400)
return np.mean(np.diff(data, n=2)) > 0
def separate_audio_tracks(audio_file, save_dir=None, use_gpu=True):
if not save_dir:
save_dir = os.path.join('Media', 'Audio', 'Separated')
if use_gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
else:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
audio_filename = os.path.split(audio_file)[1]
print('audio_filename', audio_filename)
separated_path = os.path.join(save_dir, audio_filename.split('.')[0])
print('separated_path', separated_path)
if os.path.exists(separated_path) and len(os.listdir(separated_path)) > 0:
print('Separated tracks found. Skipping audio track separation.')
else:
print('Separating music in to drums, bass, vocals, & other and saving in save_dir.')
mult = False if 'win' in sys.platform else True # Handle windows lack of support for multiprocess module
separator = Separator('spleeter:4stems', multiprocess=mult) # Split to: Bass, Drums, Vocals, & Other
separator.separate_to_file(audio_file, save_dir, synchronous=True)
return os.path.join(save_dir, audio_filename.split('.')[0])