forked from hisfog/SfMNeXt-Impl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CKA_visualize.py
286 lines (230 loc) · 9.58 KB
/
CKA_visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
from __future__ import absolute_import, division, print_function
from visualizer import get_local
get_local.activate()
import torch
import torchvision.transforms as T
import json
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt
import networks
import os
import sys
import cv2
from options import MonodepthOptions
def grid_show(to_shows, cols):
rows = (len(to_shows)-1) // cols + 1
it = iter(to_shows)
fig, axs = plt.subplots(rows, cols, figsize=(rows*8.5, cols*2))
for i in range(rows):
for j in range(cols):
try:
image, title = next(it)
except StopIteration:
image = np.zeros_like(to_shows[0][0])
title = 'pad'
axs[i, j].imshow(image)
axs[i, j].set_title(title)
axs[i, j].set_yticks([])
axs[i, j].set_xticks([])
plt.show()
def visualize_head(att_map):
ax = plt.gca()
# Plot the heatmap
im = ax.imshow(att_map)
# Create colorbar
cbar = ax.figure.colorbar(im, ax=ax)
plt.show()
def visualize_heads(att_map, cols):
to_shows = []
att_map = att_map.squeeze()
for i in range(att_map.shape[0]):
to_shows.append((att_map[i], f'Head {i}'))
average_att_map = att_map.mean(axis=0)
to_shows.append((average_att_map, 'Head Average'))
grid_show(to_shows, cols=cols)
def gray2rgb(image):
return np.repeat(image[...,np.newaxis],3,2)
def cls_padding(image, mask, cls_weight, grid_size):
if not isinstance(grid_size, tuple):
grid_size = (grid_size, grid_size)
image = np.array(image)
H, W = image.shape[:2]
delta_H = int(H/grid_size[0])
delta_W = int(W/grid_size[1])
padding_w = delta_W
padding_h = H
padding = np.ones_like(image) * 255
padding = padding[:padding_h, :padding_w]
padded_image = np.hstack((padding,image))
padded_image = Image.fromarray(padded_image)
draw = ImageDraw.Draw(padded_image)
draw.text((int(delta_W/4),int(delta_H/4)),'CLS', fill=(0,0,0)) # PIL.Image.size = (W,H) not (H,W)
mask = mask / max(np.max(mask),cls_weight)
cls_weight = cls_weight / max(np.max(mask),cls_weight)
if len(padding.shape) == 3:
padding = padding[:,:,0]
padding[:,:] = np.min(mask)
mask_to_pad = np.ones((1,1)) * cls_weight
mask_to_pad = Image.fromarray(mask_to_pad)
mask_to_pad = mask_to_pad.resize((delta_W, delta_H))
mask_to_pad = np.array(mask_to_pad)
padding[:delta_H, :delta_W] = mask_to_pad
padded_mask = np.hstack((padding, mask))
padded_mask = padded_mask
meta_mask = np.zeros((padded_mask.shape[0], padded_mask.shape[1],4))
meta_mask[delta_H:,0: delta_W, :] = 1
return padded_image, padded_mask, meta_mask
def visualize_grid_to_grid_with_cls(att_map, grid_index, image, grid_size=14, alpha=0.6):
if not isinstance(grid_size, tuple):
grid_size = (grid_size, grid_size)
attention_map = att_map[grid_index]
cls_weight = attention_map[0]
mask = attention_map[1:].reshape(grid_size[0], grid_size[1])
mask = Image.fromarray(mask).resize((image.size))
padded_image ,padded_mask, meta_mask = cls_padding(image, mask, cls_weight, grid_size)
if grid_index != 0: # adjust grid_index since we pad our image
grid_index = grid_index + (grid_index-1) // grid_size[1]
grid_image = highlight_grid(padded_image, [grid_index], (grid_size[0], grid_size[1]+1))
fig, ax = plt.subplots(1, 2, figsize=(10,7))
fig.tight_layout()
ax[0].imshow(grid_image)
ax[0].axis('off')
ax[1].imshow(grid_image)
ax[1].imshow(padded_mask, alpha=alpha, cmap='rainbow')
ax[1].imshow(meta_mask)
ax[1].axis('off')
def visualize_grid_to_grid(att_map, grid_index, image, grid_size=14, alpha=0.6):
"""
grid_size=14是因为patch_size=16, 所以有224/16=14个patch
grid_index代表的是第几个grid,这个grid的attn_map是14x14大小的,代表的是对于每个patch的相似度,
然后resize成224x224大小即可得到对原图每个位置的相似度
然后作为透明度的掩码即可叠加到原图上。
"""
if not isinstance(grid_size, tuple):
grid_size = (grid_size, grid_size)
_, H,W = att_map.shape
with_cls_token = False
grid_image = highlight_grid(image, [grid_index], (8, 25))
# grid_image = highlight_grid(image, [grid_index], grid_size)
mask = att_map[grid_index].reshape(grid_size[0], grid_size[1])
mask = Image.fromarray(mask).resize((image.size))
fig, ax = plt.subplots(1, 1, figsize=(10,7))
fig.tight_layout()
# ax[0].imshow(grid_image)
# ax[0].axis('off')
ax.imshow(grid_image)
ax.imshow(mask/np.max(mask), alpha=alpha, cmap='rainbow')
ax.axis('off')
plt.savefig('attn_vis/attn_{}.png'.format(grid_index), bbox_inches='tight', pad_inches=0)
# plt.show()
def highlight_grid(image, grid_indexes, grid_size=14):
if not isinstance(grid_size, tuple):
grid_size = (grid_size, grid_size)
W, H = image.size
h = H / grid_size[0]
w = W / grid_size[1]
image = image.copy()
for grid_index in grid_indexes:
x, y = np.unravel_index(grid_index, (grid_size[0], grid_size[1]))
a= ImageDraw.ImageDraw(image)
a.rectangle([(y*w,x*h),(y*w+w,x*h+h)],fill =None,outline ='red',width =4)
return image
def hsic(x, y):
# Kx = np.expand_dims(x, 0) - np.expand_dims(x, 1)
# Kx = np.exp(- Kx**2) # 计算核矩阵
#
# Ky = np.expand_dims(y, 0) - np.expand_dims(y, 1)
# Ky = np.exp(- Ky**2) # 计算核矩阵
Kx = np.outer(x, x)
Ky = np.outer(y, y)
Kxy = np.dot(Kx, Ky)
n = Kxy.shape[0]
h = np.trace(Kxy) / n**2 + np.mean(Kx) * np.mean(Ky) - 2 * np.mean(Kxy) / n
return h * n**2 / (n - 1)**2
# 计算两个一维向量之前的CKA相似度
def CKA_impl(vec1, vec2):
return hsic(vec1, vec2) / np.sqrt(hsic(vec1, vec1) * hsic(vec2, vec2))
# 计算两个二维向量每个位置之间的相似度,并返回为一个二维的相似度矩阵(heatmap)
def CKA_vis(vec1, vec2):
# 如果两个数组的形状不同,抛出异常
if vec1.shape != vec2.shape:
raise ValueError("Cannot add arrays with different shapes.")
# 创建一个数组 c,用来存储结果
c = np.zeros((vec1.shape[0], vec1.shape[0]))
# 遍历每个位置,并计算相似度
for i, v1 in enumerate(vec1):
for j, v2 in enumerate(vec2):
c[i][j] = CKA_impl(v1, v2)
# 返回计算结果
return c
def CKA_main(opt):
# image = Image.open('./assets/noise.jpg')
# image = Image.open('./assets/RGB/0000000182.png')
# image = Image.open('./assets/con1.png')
image = Image.open('./assets/nwpu/1668829037004.jpg')
transforms = T.Compose([
T.Resize((320, 1024)),
T.ToTensor(),
])
input_tensor = transforms(image).unsqueeze(0)
input_tensor = input_tensor.cuda()
print(input_tensor.shape)
get_local.clear()
encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")
encoder_dict = torch.load(encoder_path)
encoder = networks.BaseEncoder.build(model_dim=opt.model_dim)
# depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)
depth_decoder = networks.Depth_Decoder_QueryTr(in_channels=opt.model_dim, patch_size=opt.patch_size, dim_out=opt.dim_out, embedding_dim=opt.model_dim,
query_nums=opt.query_nums, num_heads=4,
min_val=0.001, max_val=10.0)
model_dict = encoder.state_dict()
encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
depth_decoder.load_state_dict(torch.load(decoder_path))
encoder.cuda()
# encoder = torch.nn.DataParallel(encoder)
encoder.eval()
depth_decoder.cuda()
# depth_decoder = torch.nn.DataParallel(depth_decoder)
depth_decoder.eval()
with torch.no_grad():
output = depth_decoder(encoder(input_tensor))
cache = get_local.cache
print(list(cache.keys()))
attn_maps = cache['Depth_Decoder_QueryTr.forward']
# print(len(attn_maps))
print(attn_maps[0][0].shape)
raw_map = attn_maps[0][0]
# print(type(attn_maps[0][0]))
raw_map = raw_map.transpose(1, 2, 0) # visualize all features within resolution of 20x64
new_shape = (20, 64)
resized_map = cv2.resize(raw_map, new_shape)
# new_shape = (32, 10, 32) # visualize left-top local features
# resized_map = np.resize(raw_map, new_shape)
H, W, C = resized_map.shape
print(resized_map.shape)
feat_vec = resized_map.reshape(H*W, C)
# feat_vec = feat_vec.transpose(1, 0)
print(feat_vec.shape)
CKA_heatmap = CKA_vis(feat_vec, feat_vec)
fig, ax = plt.subplots(1, 1, figsize=(10,10))
fig.tight_layout()
ax.imshow(CKA_heatmap)
name = "1668829037004_x0"
# plt.colorbar()
plt.savefig('CKA_vis/CKA_{}_{}.png'.format(name ,new_shape), bbox_inches='tight', pad_inches=0)
def convert_arg_line_to_args(arg_line):
for arg in arg_line.split():
if not arg.strip():
continue
yield str(arg)
if __name__ == "__main__":
options = MonodepthOptions()
options.parser.convert_arg_line_to_args = convert_arg_line_to_args
if sys.argv.__len__() == 2:
arg_filename_with_prefix = '@' + sys.argv[1]
opt = options.parser.parse_args([arg_filename_with_prefix])
else:
opt = options.parser.parse_args()
CKA_main(opt)