-
Notifications
You must be signed in to change notification settings - Fork 0
/
losses_helper.py
397 lines (267 loc) · 13.9 KB
/
losses_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
import tensorflow as tf
import numpy as np
import lmbspecialops as sops
import math
# loss value ranges around 0.01 to 0.1
def photoconsistency_loss(img,predicted_flow, weight=7, typee='forward',summary_type='_train'):
with tf.variable_scope('photoconsistency_loss_' + typee):
img1, img2 = get_separate_rgb_images(img)
predicted_flow = denormalize_flow(predicted_flow)
if not typee is 'forward':
# backward flow
warped_img = flow_warp(img1,predicted_flow)
img2 = get_occulation_aware_image(img2,warped_img)
img2 = tf.stop_gradient(img2)
pc_loss = endpoint_loss(img2, warped_img,weight,'pc_loss_backward',summary_type=summary_type)
else:
# forward flow
warped_img = flow_warp(img2,predicted_flow)
img1 = get_occulation_aware_image(img1,warped_img)
img1 = tf.stop_gradient(img1)
pc_loss = endpoint_loss(img1, warped_img,weight,'pc_loss_forward',summary_type=summary_type)
# pc_loss = tf.Print(pc_loss,[pc_loss],'pcloss ye hai ')
# tf.losses.compute_weighted_loss(pc_loss,weights=weight)
# tf.summary.scalar('pc_loss',sops.replace_nonfinite(pc_loss))
return pc_loss
def denormalize_flow(flow):
flow_shape = flow.get_shape().as_list()
u = flow[:,:,:,0] * flow_shape[2]
v = flow[:,:,:,1] * flow_shape[1]
u = tf.expand_dims(u,axis=-1)
v = tf.expand_dims(v,axis=-1)
return tf.concat([u,v],axis=-1)
def forward_backward_loss(predicted_flow_forward,predicted_flow_backward,name='ref1',weight=1):
with tf.variable_scope('fb_loss'):
# predicted_flow = sops.replace_nonfinite(predicted_flow)
'''
So, here we do the following steps.
1) Use meshgrid to generate pixel positions in X and Y direction
2) Add forward flow values to the meshgrid X,Y positions, than we get the resulting flow field, we call it A.
3) Warp A with the backward flow field (by warp I mean use resampler), where we pass in the backward_flow as
arg1 and A as arg 2. This means now our flow field is pointing backwards, this gives us B.
4) Now simply perform flow_forward + B to find the difference between the forward and backward flow fields.
5) We've to minimize this difference to 0 ( or loss to 0 ).
'''
# get batch size ( assuming batch_size will always be divisible by 2 )
# 0 - 16 is the forward flow
# 17 - 31 is the backward flow
flow_forward = predicted_flow_forward
flow_backward = predicted_flow_backward
flow_forward = sops.replace_nonfinite(flow_forward)
flow_backward = sops.replace_nonfinite(flow_backward)
# flow_forward = tf.Print(flow_forward,[flow_forward],'forward ye hai ')
# flow_backward = tf.Print(flow_backward,[flow_backward],'backward ye hai ')
flow_forward = tf.check_numerics(flow_forward,'flow_forward Nan Value found')
flow_backward = tf.check_numerics(flow_backward,'flow_backward Nan Value found')
flow_forward_denormed = denormalize_flow(flow_forward)
# flow_forward_denormed = sops.replace_nonfinite(flow_forward_denormed)
# flow_forward_denormed = flow_forward
# tf.summary.image('flow_forward_u_loss',tf.expand_dims(flow_forward[:,:,:,0],axis=-1))
# tf.summary.image('flow_forward_v_loss',tf.expand_dims(flow_forward[:,:,:,1],axis=-1))
# tf.summary.image('flow_backward_u_loss',tf.expand_dims(flow_backward[:,:,:,0],axis=-1))
# tf.summary.image('flow_backward_v_loss',tf.expand_dims(flow_backward[:,:,:,1],axis=-1))
# concatenated_flow_u_fb = tf.concat([tf.expand_dims(flow_forward[:,:,:,0]),tf.expand_dims(flow_backward[:,:,:,0])],axis=-2)
# tf.summary.image('concatenated_fb_ref1_u',concatenated_fb_ref1_u)
# tf.summary.image('concatenated_fb_ref1_v',concatenated_fb_ref1_v)
# step 1,2,3
B = sops.replace_nonfinite(flow_warp(flow_backward,flow_forward_denormed))
# B = get_occulation_aware_image(flow_forward,B)
tf.summary.image('flow_backward_warped_u_loss'+name,tf.expand_dims(B[:,:,:,0],axis=-1))
tf.summary.image('flow_backward_warped_v_loss'+name,tf.expand_dims(B[:,:,:,1],axis=-1))
# step 4
fb_loss = sops.replace_nonfinite(endpoint_loss(-B,flow_forward,weight,'fb_loss',False))
# tf.losses.compute_weighted_loss(fb_loss,weights=weight)
return fb_loss
# loss value ranges around 0.01 to 2.0
# defined here :: https://arxiv.org/pdf/1702.02295.pdf
def endpoint_loss(gt_flow,predicted_flow,weight=500,scope='epe_loss',stop_grad=False,summary_type='_train'):
with tf.variable_scope(scope):
if stop_grad == False:
gt_flow = tf.stop_gradient(gt_flow)
# get u & v value for gt
gt_u = tf.slice(gt_flow,[0,0,0,0],[-1,-1,-1,1])
gt_v = tf.slice(gt_flow,[0,0,0,1],[-1,-1,-1,1])
# get u & v value for predicted_flow
pred_u = tf.slice(predicted_flow,[0,0,0,0],[-1,-1,-1,1])
pred_v = tf.slice(predicted_flow,[0,0,0,1],[-1,-1,-1,1])
diff_u = sops.replace_nonfinite(gt_u - pred_u)
diff_v = sops.replace_nonfinite(gt_v - pred_v)
epe_loss = tf.sqrt((diff_u**2) + (diff_v**2) + 1e-6)
epe_loss = tf.reduce_mean(sops.replace_nonfinite(epe_loss))
epe_loss = tf.check_numerics(epe_loss,'numeric checker')
# epe_loss = tf.Print(epe_loss,[epe_loss],'epeloss ye hai ')
if summary_type == '_test':
tf.summary.scalar('weighted_epe_loss'+summary_type,epe_loss * weight)
else:
tf.losses.compute_weighted_loss(epe_loss,weights=weight)
return epe_loss
def depth_consistency_loss(img,predicted_optflow_uv,weight=10):
with tf.variable_scope('depth_consistency_loss'):
img1_depth, img2_depth = get_separate_depth_images(img)
img2_depth = tf.expand_dims(img2_depth,axis=3)
# will return a single channel depth image warped with uv optical flow
warped_depth_img = flow_warp(img2_depth,predicted_optflow_uv[:,:,:,0:2])
# loss = w - Z_1(x+u,y+v) + Z_0(x,y)
dc_loss = predicted_optflow_uv[:,:,:,2] - warped_depth_img[:,:,:,0] + img1_depth
dc_loss = tf.reduce_mean(dc_loss)
tf.summary.scalar('dc_loss',sops.replace_nonfinite(dc_loss))
# tf.losses.compute_weighted_loss(dc_loss,weights=weight)
return dc_loss
# taken from DEMON Network
def scale_invariant_gradient( inp, deltas, weights, epsilon=0.001):
"""Computes the scale invariant gradient images
inp: Tensor
deltas: list of int
The pixel delta for the difference.
This vector must be the same length as weight.
weights: list of float
The weight factor for each difference.
This vector must be the same length as delta.
epsilon: float
epsilon value for avoiding division by zero
"""
with tf.variable_scope('scale_inv_images'):
inp = tf.transpose(inp,[0,3,1,2])
assert len(deltas)==len(weights)
sig_images = []
for delta, weight in zip(deltas,weights):
sig_images.append(sops.scale_invariant_gradient(inp, deltas=[delta], weights=[weight], epsilon=epsilon))
return tf.concat(sig_images,axis=1)
# loss value ranges around 80 to 100
# taken from DEMON Network
def scale_invariant_gradient_loss(inp, gt, epsilon,decay_steps,global_step,weight=100):
"""Computes the scale invariant gradient loss
inp: Tensor
Tensor with the scale invariant gradient images computed on the prediction
gt: Tensor
Tensor with the scale invariant gradient images computed on the ground truth
epsilon: float
epsilon value for avoiding division by zero
"""
with tf.variable_scope('scale_invariant_gradient_loss'):
num_channels_inp = inp.get_shape().as_list()[1]
num_channels_gt = gt.get_shape().as_list()[1]
assert num_channels_inp%2==0
assert num_channels_inp==num_channels_gt
tmp = []
for i in range(num_channels_inp//2):
tmp.append(pointwise_l2_loss(inp[:,i*2:i*2+2,:,:], gt[:,i*2:i*2+2,:,:], epsilon))
tmp = tf.add_n(tmp)
# weight_increase_rate = tf.train.polynomial_decay(FLAGS.SIGL_START_LEARNING_RATE, global_step,
# decay_steps, FLAGS.SIGL_END_LEARNING_RATE,
# power=FLAGS.SIGL_POWER)
# tf.summary.scalar('weight_increase_rate',weight)
# tmp = tf.Print(tmp,[tmp],'sigl ye hai ')
tf.losses.compute_weighted_loss(tmp,weights=weight)
return tmp
# taken from DEMON Network
def pointwise_l2_loss(inp, gt, epsilon, data_format='NCHW'):
"""Computes the pointwise unsquared l2 loss.
The input tensors must use the format NCHW.
This loss ignores nan values.
The loss is normalized by the number of pixels.
inp: Tensor
This is the prediction.
gt: Tensor
The ground truth with the same shape as 'inp'
epsilon: float
The epsilon value to avoid division by zero in the gradient computation
"""
with tf.name_scope('pointwise_l2_loss'):
gt_ = tf.stop_gradient(gt)
diff = sops.replace_nonfinite(inp-gt_)
if data_format == 'NCHW':
return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=1)+epsilon))
else: # NHWC
return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=3)+epsilon))
# returns an image with all the occulded pixel values as 0
def get_occulation_aware_image(img,warped_img):
masked_img = warped_img / warped_img
masked_img = sops.replace_nonfinite(masked_img)
return masked_img * img
# factorU = reduces the optical flow U component by the factor with which we reduce the size of image
# factorV = reduces the optical flow V component by the factor with which we reduce the size of image
# factorW = reduces the optical flow W component by the factor with which we reduce the size of image
# size = the size at which you want to resize your original image label.
# gt_flow = ground truth flow label.
# resize the gt_flow to the size of predict_flow4 for minimizing loss also after encoder ( before decoder )
def downsample_label(gt_flow,size=[224,384],factorU=0.5,factorV=0.5):
gt_u = tf.slice(gt_flow,[0,0,0,0],[-1,-1,-1,1])
gt_v = tf.slice(gt_flow,[0,0,0,1],[-1,-1,-1,1])
# gt_w = tf.slice(gt_flow,[0,0,0,2],[-1,-1,-1,1])
# since we're reducing the size, we need to reduce the flow values by the same factor.
# decreasing width from 224 to 5 means we decreased the image by a factor ( 384 * 0.022 )
gt_u = gt_u * factorU
# decreasing width from 384 to 10 means we decreased the image by a factor ( 384 * 0.026 )
gt_v = gt_v * factorV
# decreasing depth, in this case we'll just take the avg of factors of width and height ( 0.026 + 0.024 / 2 )
# gt_w = gt_w * factorW
gt_u = tf.image.resize_images(gt_u,size,method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
gt_v = tf.image.resize_images(gt_v,size,method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# gt_w = tf.image.resize_images(gt_w,size,method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return tf.concat([gt_u,gt_v],axis=-1)
# return tf.concat([gt_u,gt_v,gt_w],axis=-1)
def get_separate_rgb_images(img):
return img[:,:,:,0:3],img[:,:,:,4:7]
def get_separate_depth_images(img):
return img[:,:,:,3],img[:,:,:,7]
# warp the flow values to the image.
def flow_warp(img,flow):
# returns [16,224,384,6]
input_size = img.get_shape().as_list()
x = list(range(0,input_size[2]))
y = list(range(0,input_size[1]))
X, Y = tf.meshgrid(x, y)
# X = tf.expand_dims(X,0)
# Y = tf.expand_dims(Y,0)
X = tf.cast(X,np.float32)
Y = tf.cast(Y,np.float32)
X = X + flow[:,:,:,0]
Y = Y + flow[:,:,:,1]
con = tf.stack([X,Y])
result = tf.transpose(con,[1,2,3,0])
# result = tf.expand_dims(result,0)
return tf.contrib.resampler.resampler(img,result)
def gan_loss(fake_flow_d,real_flow_d,conv_real,conv_fake,weight=10,summary_type='_train'):
EPS = 1e-12
with tf.variable_scope('generator_loss'):
g_total_loss = sops.replace_nonfinite(tf.reduce_mean(-tf.log(fake_flow_d + EPS)))
# g_total_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv4_fake,labels=tf.ones_like(conv4_real)))
# tf.losses.compute_weighted_loss(g_total_loss,weights=1)
with tf.variable_scope('discriminator_loss'):
d_total_loss = sops.replace_nonfinite(tf.reduce_mean(-(tf.log(real_flow_d + EPS) + tf.log(1 - fake_flow_d + EPS))))
# d_total_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv4_real,labels=tf.ones_like(conv4_real)))
# d_total_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv4_fake,labels=tf.zeros_like(conv4_real)))
# d_total_loss = d_total_loss_fake + d_total_loss_real
# d_total_loss = sops.replace_nonfinite(d_total_loss)
# feature_matching_loss = endpoint_loss(conv_real,conv_fake,weight=1,scope='feature_matching_loss')
# tf.summary.scalar('feature_matching_loss',feature_matching_loss)
# d_total_loss += feature_matching_loss
# tf.add_to_collection('disc_loss',feature_matching_loss)
# tf.add_to_collection('disc_loss',d_total_loss)
# tf.summary.scalar('disc_loss'+summary_type,d_total_loss)
# tf.summary.scalar('feature_matching_loss',feature_matching_loss)
return g_total_loss, d_total_loss
def ease_in_quad( current_time, start_value, change_value, duration , starter, name="ease_in_quad"):
"""
current_time: float or Tensor
The current time
start_value: float or Tensor
The start value
change_value: float or Tensor
The value change of the duration. The final value is
start_value + change_value
duration: float or Tensor
The duration
Returns the value for the current time
"""
current_time = current_time - starter
with tf.name_scope(name):
t = tf.clip_by_value(current_time/duration, 0, 1)
result = tf.to_float(change_value*t*t + start_value)
tf.summary.scalar('ease_in_quad',result)
return result
# _depth_sig_weight_factor = ease_in_quad(trainer.global_step(),0,1,10*_k)
def get_learning_rate(global_step,learning_rate,end_learning_rate,decay_steps,power):
decayed_learning_rate = (learning_rate - end_learning_rate) * np.power((1.0 - float(global_step) / decay_steps), power) + end_learning_rate
return decayed_learning_rate