-
Notifications
You must be signed in to change notification settings - Fork 4
/
quantizeWeights.py
70 lines (52 loc) · 2.61 KB
/
quantizeWeights.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from sklearn.cluster import KMeans
import numpy as np
import torch
from tqdm import tqdm
import copy
def quantize1d_kmeans(x, num_bits=8, n_jobs=-1):
if x.size < 2**num_bits:
return x
else:
orig_shape = x.shape
x = np.expand_dims(x.flatten(), -1)
# init = np.expand_dims(np.linspace(x.min(), x.max(), 2**num_bits), -1)
kmeans = KMeans(n_clusters=2**num_bits, random_state=0, n_jobs=n_jobs)
x_kmeans = kmeans.fit_predict(x)
q_kmeans = np.array([kmeans.cluster_centers_[i] for i in x_kmeans])
return q_kmeans.reshape(orig_shape)
def leave_8_bit(name,param):
# first layer or last layer
return ('weight' in name and param.dim() > 1 and param.shape[1] == 3) or \
('fc' in name and param.shape[0] == 1000) or \
('classifier' in name and param.shape[0] == 1000)
# (name == 'Conv2d_2a_3x3.conv.weight') # WA for inception_v3
def bn_layers(name, param):
# BN
return (param.dim() == 1) and (param.shape[0] != 1000)
def quantize_model_parameters(model, num_bits):
# Quantize parameters of the model with kmeans
named_params = [np for np in model.named_parameters() if not bn_layers(*np) and not leave_8_bit(*np)]
named_params_8 = [np for np in model.named_parameters() if leave_8_bit(*np)]
named_params_16 = [np for np in model.named_parameters() if bn_layers(*np)]
for np in tqdm(named_params):
np[1].data = torch.tensor(quantize1d_kmeans(np[1].detach().numpy(), num_bits=num_bits))
for np in tqdm(named_params_8):
np[1].data = torch.tensor(quantize1d_kmeans(np[1].detach().numpy(), num_bits=8))
for np in tqdm(named_params_16):
np[1].data = torch.tensor(quantize1d_kmeans(np[1].detach().numpy(), num_bits=16))
def quantizeWeights(model, num_bits,log):
# Quantize model by kmeans non uniform quantization
model_qkmeans = copy.deepcopy(model)
quantize_model_parameters(model_qkmeans, num_bits=num_bits)
# Per channel bias correction
p_km = [np for np in model_qkmeans.named_parameters()]
p_orig = [np for np in model.named_parameters()]
for i in tqdm(range(len(p_km))):
if not bn_layers(p_km[i][0], p_km[i][1]) and not (leave_8_bit(p_km[i][0], p_km[i][1])):
w_km = p_km[i][1]
w_orig = p_orig[i][1]
mean_delta = w_km.view(w_km.shape[0], -1).mean(dim=-1) - w_orig.view(w_orig.shape[0], -1).mean(dim=-1)
p_km[i][1].data = (w_km.view(w_km.shape[0], -1) - mean_delta.view(mean_delta.shape[0], 1)).view(
w_orig.shape)
log.info('Weights quantized to {} bits'.format(num_bits))
return model_qkmeans