forked from dhruvesh13/Audio-Genre-Classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
learn.py
135 lines (109 loc) · 4.16 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import sklearn
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import scipy
import os
import sys
import glob
import numpy as np
from utils1 import GENRE_DIR, GENRE_LIST
from sklearn.externals import joblib
from random import shuffle
"""reads FFT-files and prepares X_train and y_train.
genre_list must consist of names of folders/genres consisting of the required FFT-files
base_dir must contain genre_list of directories
"""
def read_fft(genre_list, base_dir):
X = []
y = []
for label, genre in enumerate(genre_list):
# create UNIX pathnames to id FFT-files.
genre_dir = os.path.join(base_dir, genre, "*.fft.npy")
# get path names that math genre-dir
file_list = glob.glob(genre_dir)
for file in file_list:
fft_features = np.load(file)
X.append(fft_features)
y.append(label)
return np.array(X), np.array(y)
"""reads MFCC-files and prepares X_train and y_train.
genre_list must consist of names of folders/genres consisting of the required MFCC-files
base_dir must contain genre_list of directories
"""
def read_ceps(genre_list, base_dir):
X= []
y=[]
for label, genre in enumerate(genre_list):
for fn in glob.glob(os.path.join(base_dir, genre, "*.ceps.npy")):
ceps = np.load(fn)
num_ceps = len(ceps)
X.append(np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0))
#X.append(ceps)
y.append(label)
print(np.array(X).shape)
print(len(y))
return np.array(X), np.array(y)
def learn_and_classify(X_train, y_train, X_test, y_test, genre_list):
print(len(X_train))
print(len(X_train[0]))
#Logistic Regression classifier
logistic_classifier = linear_model.logistic.LogisticRegression()
logistic_classifier.fit(X_train, y_train)
logistic_predictions = logistic_classifier.predict(X_test)
logistic_accuracy = accuracy_score(y_test, logistic_predictions)
logistic_cm = confusion_matrix(y_test, logistic_predictions)
print("logistic accuracy = " + str(logistic_accuracy))
print("logistic_cm:")
print(logistic_cm)
#change the pickle file when using another classifier eg model_mfcc_fft
joblib.dump(logistic_classifier, 'saved_models/model_mfcc_log.pkl')
#K-Nearest neighbour classifier
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(X_train, y_train)
knn_predictions = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_cm = confusion_matrix(y_test, knn_predictions)
print("knn accuracy = " + str(knn_accuracy))
print("knn_cm:")
print(knn_cm)
joblib.dump(knn_classifier, 'saved_models/model_mfcc_knn.pkl')
plot_confusion_matrix(logistic_cm, "Confusion matrix", genre_list)
plot_confusion_matrix(knn_cm, "Confusion matrix for FFT classification", genre_list)
def plot_confusion_matrix(cm, title, genre_list, cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(genre_list))
plt.xticks(tick_marks, genre_list, rotation=45)
plt.yticks(tick_marks, genre_list)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
def main():
base_dir_fft = GENRE_DIR
base_dir_mfcc = GENRE_DIR
"""list of genres (these must be folder names consisting .wav of respective genre in the base_dir)
Change list if needed.
"""
genre_list = [ "blues","classical","country","disco","metal"]
#genre_list = ["classical", "jazz"] IF YOU WANT TO CLASSIFY ONLY CLASSICAL AND JAZZ
#use FFT
# X, y = read_fft(genre_list, base_dir_fft)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .20)
# print('\n******USING FFT******')
# learn_and_classify(X_train, y_train, X_test, y_test, genre_list)
# print('*********************\n')
#use MFCC
X,y= read_ceps(genre_list, base_dir_mfcc)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .20)
print("new1",X_train.shape)
print('******USING MFCC******')
learn_and_classify(X_train, y_train, X_test, y_test, genre_list)
print('*********************')
if __name__ == "__main__":
main()