-
Notifications
You must be signed in to change notification settings - Fork 3
/
predict_pitch.py
128 lines (111 loc) · 5.98 KB
/
predict_pitch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import sys
import numpy as np
sys.path.append('./thickstun/lib/')
sys.path.insert(0,'lib/')
import cf
import diagnostics
import base_model
import tensorflow as tf
import os,mmap
import librosa
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def create_filters(d,k):
x = np.linspace(0, 2*np.pi, d, endpoint=False)
wsin = np.empty((1,d,1,k), dtype=np.float32)
wcos = np.empty((1,d,1,k), dtype=np.float32)
start_freq = 50.
end_freq = 6000.
num_cycles = start_freq*d/44100.
scaling_ind = np.log(end_freq/start_freq)/k
window_mask = 1.0-1.0*np.cos(x)
for ind in range(k):
wsin[0,:,0,ind] = window_mask*np.sin(np.exp(ind*scaling_ind)*num_cycles*x)
wcos[0,:,0,ind] = window_mask*np.cos(np.exp(ind*scaling_ind)*num_cycles*x)
return wsin,wcos
class Spectrograms(base_model.Model):
def __init__(self, *args, **kwargs):
super(Spectrograms, self).__init__(*args, **kwargs)
def define_graph(self):
super(Spectrograms, self).define_graph()
# lvl1 convolutions are shared between regions
self.k = 512 # lvl1 nodes
self.d = 4096 # lvl1 receptive field
d2_x = 1 # lvl2 input dims_x
d2_y = 128 # lvl2 input dims_y
k2 = 128 # num lvl2 filters
stride_y = 2 # lvl2 stride
d3_x = 25 # lvl3 input dims_x
d3_y = 1 # lvl3 input dims_y (fully connected)
k3 = 4096 # num lvl3 filters
num_regions = 1 + (self.window-self.d)/self.stride
#print 'First layer regions: ({},{})'.format(num_regions,self.k)
num_regions2_x = 1 + (num_regions-d2_x)/1
num_regions2_y = 1 + (self.k-d2_y)/stride_y
#print 'Second layer regions: ({},{})'.format(num_regions2_x,num_regions2_y)
num_regions3_x = 1 + (num_regions2_x - d3_x)/1
num_regions3_y = 1 + (num_regions2_y - d3_y)/1
wsin,wcos = create_filters(self.d,self.k)
print ('---- Weights ----')
wscale = .0001
with tf.compat.v1.variable_scope('parameters'):
w = tf.Variable(wscale*tf.random.normal([d2_x,d2_y,1,k2],seed=999))
print ('w',w)
wavg = self.register_weights(w,'w',average=.9998)
w2 = tf.Variable(wscale*tf.random.normal([d3_x,d3_y,k2,k3],seed=999))
print ('w2',w2)
w2avg = self.register_weights(w2,'w2',average=.9998)
beta = tf.Variable(wscale*tf.random.normal([int(num_regions3_x*num_regions3_y*k3),self.m],seed=999))
print ('beta',beta)
betaavg = self.register_weights(beta,'beta',average=.9998)
print ('---- Layers ----')
with tf.compat.v1.variable_scope('queued_model'):
zx = tf.square(tf.nn.conv2d(self.xq,wsin,strides=[1,1,self.stride,1],padding='VALID')) \
+ tf.square(tf.nn.conv2d(self.xq,wcos,strides=[1,1,self.stride,1],padding='VALID'))
print ('zx',zx)
z2 = tf.nn.relu(tf.nn.conv2d(tf.math.log(zx+10e-15),w,strides=[1,1,1,stride_y],padding='VALID',data_format='NCHW'))
print ('z2',z2)
z3 = tf.nn.relu(tf.nn.conv2d(z2,w2,strides=[1,1,1,1],padding='VALID',data_format='NCHW'))
print ('z3',z3)
y = tf.matmul(tf.reshape(z3,[self.batch_size,int(num_regions3_x*num_regions3_y*k3)]),beta)
print ('y',y)
self.loss = tf.reduce_mean(tf.nn.l2_loss(y-tf.reshape(self.yq,[self.batch_size,self.m])))
with tf.compat.v1.variable_scope('direct_model'):
self.zx = tf.square(tf.nn.conv2d(self.xd,wsin,strides=[1,1,self.stride,1],padding='VALID')) \
+ tf.square(tf.nn.conv2d(self.xd,wcos,strides=[1,1,self.stride,1],padding='VALID'))
self.z2 = tf.nn.relu(tf.nn.conv2d(tf.math.log(self.zx+10e-15),wavg,strides=[1,1,1,stride_y],padding='VALID',data_format='NCHW'))
self.z3 = tf.nn.relu(tf.nn.conv2d(self.z2,w2avg,strides=[1,1,1,1],padding='VALID',data_format='NCHW'))
self.y_direct = tf.matmul(tf.reshape(self.z3,[tf.shape(self.xd)[0],int(num_regions3_x*num_regions3_y*k3)]),betaavg)
self.loss_direct = tf.reduce_mean(tf.nn.l2_loss(self.y_direct-self.yd))
def predict(path):
labels = None
try: model.stop()
except NameError: pass
model = Spectrograms(labels,checkpoint_path='./thickstun/convnet_experimental2_morelvl3/', outputs=1, window=16384, mmap=True,
normalize=True, extended_test_set=False, use_mirex=True, init=False, pitch_transforms=5, jitter=.1,
restrict=False,isTest=False)
print ('finish model loading...')
for i,f in enumerate(os.listdir('./thickstun/data/records/')[:]):
if (not os.path.isfile(path+f)):
try:
print (f + ' complete!')
mse_test, Yhat, Y, mse_breakdown, avp_breakdown = model.sample_records(int(f[:-4]), 10000, fixed_stride=512)
np.save(path+f,Yhat.T)
except Exception as e: print (e)
else: print ('exist')
def main(name):
labels = None
try: model.stop()
except NameError: pass
model = Spectrograms(labels,checkpoint_path='./thickstun/convnet_experimental2_morelvl3/', outputs=1, window=16384, mmap=True,
normalize=True, extended_test_set=False, use_mirex=True, init=False, pitch_transforms=5, jitter=.1,
restrict=False)
print ('finish model loading...')
print(name)
data, y = librosa.load('./mp3/'+name,44100)
np.save('./thickstun/tmp/test.npy',data)
fd = os.open('./thickstun/tmp/test.npy', os.O_RDONLY)
buff = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
mse_test, Yhat, Y, mse_breakdown, avp_breakdown = model.sample_records(buff, 10000, fixed_stride=512)
np.save('./thickstun/pitch/'+name[:-4]+'.npy',Yhat)
if __name__ == "__main__":
main(sys.argv[1])