-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_net.py
166 lines (129 loc) · 4.32 KB
/
neural_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
class Layer:
def __call__(self, x):
return self.forward(x)
def forward(self, x):
raise NotImplementedError
def backward(self, delta, learning_rate):
raise NotImplementedError
class ReLU(Layer):
def __init__(self):
self.a = None
self.d = None
return
def forward(self, x):
self.a = np.maximum(x, 0)
return self.a
def backward(self, delta, learning_rate = 0.001):
self.d = delta * (self.a > 0).astype(int)
return self.d
class Linear(Layer):
def __init__(self, n_in, n_out):
self.n_out = n_out
self.n_in = n_in
self.W = np.random.normal(0,1,(n_in, n_out))
self.b = np.zeros((1, n_out))
def forward(self, x):
self.x = x
self.a = np.matmul(x, self.W)
self.a += self.b
return self.a
def backward(self, delta, learning_rate):
self.d = np.matmul(delta, np.transpose(self.W))
self.W = self.W - learning_rate * (np.matmul(np.transpose(self.x), delta))
self.b = self.b - learning_rate * (delta.sum(axis = 0))
return self.d
class Loss:
def __call__(self, x, y):
return self.forward(x, y)
def forward(self, x, y):
raise NotImplemented
def backward(self):
raise NotImplemented
class CrossEntropyLoss(Loss):
def __init__(self):
pass
def forward(self, x, y):
self.sm = np.exp(x)
self.sm = (self.sm.T / self.sm.sum(axis = 1)).T
guesses = np.argmax(self.sm, axis = 1)
acc = (guesses == y).sum()
cel = -np.log(np.exp(np.choose(y, x.T)) / np.exp(x).sum(axis = 1))
self.y = y
return cel.sum(), acc
def backward(self):
d = np.zeros(self.sm.shape)
for i in range(0, d.shape[0]):
d[i, self.y[i]] = 1
d -= self.sm
d = -d
self.d = d
return d
class NN:
def __init__(self, layers, criterion):
self.layers = layers
self.criterion = criterion
def train(self, x, y, learning_rate):
a = x
for layer in self.layers:
a = layer.forward(a)
loss , accuracy = self.criterion.forward(a, y)
d = self.criterion.backward()
for layer in self.layers[::-1]:
d = layer.backward(d, learning_rate)
return loss, accuracy
def val(self, x, y):
a = x
for layer in self.layers:
a = layer.forward(a)
loss , accuracy = self.criterion.forward(a, y)
return loss, accuracy
if __name__ == '__main__':
mnist_data = np.load("mnist.npz")
train_x, train_y = mnist_data["train_x"], mnist_data["train_y"]
val_x, val_y = mnist_data["val_x"], mnist_data["val_y"]
train_num, val_num = train_x.shape[0], val_x.shape[0]
n_epochs = 100
batch_size = 64
learning_rate = 1e-3
layers = [
Linear(784, 100),
ReLU(),
Linear(100, 10)
]
criterion = CrossEntropyLoss()
mnist_nn = NN(layers, criterion)
# statistic data
train_loss_list, train_acc_list = [], []
val_loss_list, val_acc_list = [], []
# begin training and validation
for e in range(n_epochs):
train_loss, train_acc = 0, 0
val_loss, val_acc = 0, 0
# shuffle the training set each epoch to prevent overfitting
idxs = np.arange(train_num)
np.random.shuffle(idxs)
train_x, train_y = train_x[idxs], train_y[idxs]
# training
for b in range(0, train_num, batch_size):
range_ = range(b, min(b + batch_size, train_num))
loss, accuracy = mnist_nn.train(train_x[range_], train_y[range_], learning_rate)
train_loss += loss
train_acc += accuracy
# validation
for b in range(0, val_num, batch_size):
range_ = range(b, min(b + batch_size, val_num))
loss, accuracy = mnist_nn.val(val_x[range_], val_y[range_])
val_loss += loss
val_acc += accuracy
train_loss /= train_num
train_acc /= train_num
val_loss /= val_num
val_acc /= val_num
train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)
# summary of the epoch
print("epoch: {}, train acc: {:.2f}%, train loss: {:.3f}, val acc: {:.2f}%, val loss: {:.3f}"
.format(e+1, train_acc*100, train_loss, val_acc*100, val_loss))