You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi! I have a problem with the SSD300 implementation. I'm using a dataset of 1000 images and I'm using 750 of them to train and 250 to be the validation set. My dataset has only 1 positive class.
print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))
I think this is related to a difference between training vs. inference modes.
The predictions are correct, but they are scaled between 0 and 1 on both x and y axes. On the other hand the image has kept its original shape. As a result, all the predictions are close to the (0, 0) point on the image which is the top left corner. You should make sure the predictions and the image are on the same scale.
As a workaround you can decode the predictions from y_pred with decode_detections instead of doing it manually.
Make sure to import decode_detections first:
from ssd_encoder_decoder.ssd_output_decoder import decode_detections
Hi! I have a problem with the SSD300 implementation. I'm using a dataset of 1000 images and I'm using 750 of them to train and 250 to be the validation set. My dataset has only 1 positive class.
My training code is the following:
`img_height = 300
img_width = 300
img_channels = 3
mean_color = [123, 117, 104]
swap_channels = [2, 1, 0]
n_classes = 1
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5],
[1.0, 2.0, 0.5]]
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300]
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
clip_boxes = False
variances = [0.1, 0.1, 0.2, 0.2]
normalize_coords = True
K.clear_session()
model = ssd_300(image_size=(img_height, img_width, img_channels),
n_classes=n_classes,
mode='training',
l2_regularization=0.0005,
scales=scales,
aspect_ratios_per_layer=aspect_ratios,
two_boxes_for_ar1=two_boxes_for_ar1,
steps=steps,
offsets=offsets,
clip_boxes=clip_boxes,
variances=variances,
normalize_coords=normalize_coords,
subtract_mean=mean_color,
swap_channels=swap_channels)
weights_path = 'VGG_weights/VGG_ILSVRC_16_layers_fc_reduced.h5'
model.load_weights(weights_path, by_name=True)
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)
myDataSet_train_images_dir = 'myDatasets/Training/JPEGImages/'
myDataSet_train_annotations_dir = 'myDatasets/Training/Annotations/'
myDataSet_trainval_image_set_filename = 'myDatasets/Training/ImageSets/Main/default.txt'
myDataSet_test_images_dir = 'myDatasets/Testing/JPEGImages/'
myDataSet_test_annotations_dir = 'myDatasets/Testing/Annotations/'
myDataSet_test_image_set_filename = 'myDatasets/Testing/ImageSets/Main/default.txt'
classes = ['background',
'Plant']
train_dataset.parse_xml(images_dirs=[myDataSet_train_images_dir],
image_set_filenames=[myDataSet_trainval_image_set_filename],
annotations_dirs=[myDataSet_train_annotations_dir],
classes=classes,
include_classes='all',
exclude_truncated=False,
exclude_difficult=False,
ret=False)
val_dataset.parse_xml(images_dirs=[myDataSet_test_images_dir],
image_set_filenames=[myDataSet_test_image_set_filename],
annotations_dirs=[myDataSet_test_annotations_dir],
classes=classes,
include_classes='all',
exclude_truncated=False,
exclude_difficult=True,
ret=False)
batch_size = 5
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
img_width=img_width,
background=mean_color)
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
model.get_layer('fc7_mbox_conf').output_shape[1:3],
model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]
ssd_input_encoder = SSDInputEncoder(img_height=img_height,
img_width=img_width,
n_classes=n_classes,
predictor_sizes=predictor_sizes,
scales=scales,
aspect_ratios_per_layer=aspect_ratios,
two_boxes_for_ar1=two_boxes_for_ar1,
steps=steps,
offsets=offsets,
clip_boxes=clip_boxes,
variances=variances,
matching_type='multi',
pos_iou_threshold=0.5,
neg_iou_limit=0.5,
normalize_coords=normalize_coords)
train_generator = train_dataset.generate(batch_size=batch_size,
shuffle=True,
transformations=[ssd_data_augmentation],
label_encoder=ssd_input_encoder,
returns={'processed_images',
'encoded_labels'},
keep_images_without_gt=False)
val_generator = val_dataset.generate(batch_size=batch_size,
shuffle=False,
transformations=[convert_to_3_channels,
resize],
label_encoder=ssd_input_encoder,
returns={'processed_images',
'encoded_labels'},
keep_images_without_gt=False)
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size = val_dataset.get_dataset_size()
print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))
def lr_schedule(epoch):
if epoch < 300:
return 0.0001
elif epoch < 450:
return 0.00001
else:
return 0.000001
model_checkpoint = ModelCheckpoint(filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
monitor='val_loss',
verbose=1,
save_best_only=True,
save_weights_only=False,
mode='auto',
period=1)
csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
separator=',',
append=True)
learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
verbose=1)
terminate_on_nan = TerminateOnNaN()
callbacks = [model_checkpoint,
csv_logger,
learning_rate_scheduler,
terminate_on_nan]
initial_epoch = 0
final_epoch = 1000
steps_per_epoch = 1000
history = model.fit_generator(generator=train_generator,
steps_per_epoch=steps_per_epoch,
epochs=final_epoch,
callbacks=callbacks,
validation_data=val_generator,
validation_steps=ceil(val_dataset_size/batch_size),
initial_epoch=initial_epoch)`
The inference code is the following:
`img_height = 300
img_width = 300
model_path = 'ssd300_pascal_07+12_epoch-180_loss-3.5966_val_loss-3.3306.h5'
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
K.clear_session()
model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
'L2Normalization': L2Normalization,
'DecodeDetections': DecodeDetections,
'compute_loss': ssd_loss.compute_loss})
orig_images = []
input_images = []
img_path = 'myDatasets/Testing/JPEGImages/scene00371.png'
orig_images.append(imread(img_path))
img = image.load_img(img_path, target_size=(img_height, img_width))
img = image.img_to_array(img)
input_images.append(img)
input_images = np.array(input_images)
y_pred = model.predict(input_images)
confidence_threshold = 0.25
y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
np.set_printoptions(precision=2, suppress=True, linewidth=90, threshold=sys.maxsize)
print("Predicted boxes:\n")
print('class conf xmin ymin xmax ymax')
print(y_pred_thresh[0])
colors = plt.cm.hsv(np.linspace(0, 1, 2)).tolist()
classes = ['background',
'Plant']
plt.figure(figsize=(20,12))
plt.imshow(orig_images[0])
current_axis = plt.gca()
for box in y_pred_thresh[0]:
xmin = box[2] * orig_images[0].shape[1] / img_width
ymin = box[3] * orig_images[0].shape[0] / img_height
xmax = box[4] * orig_images[0].shape[1] / img_width
ymax = box[5] * orig_images[0].shape[0] / img_height
color = colors[round(box[0])]
label = '{}: {:.2f}'.format(classes[round(box[0])], box[1])
current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2))
current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})
plt.show()`
That's the output from the network:
An example from the image annotation XML file:
I really don't know why this is not working and I made just parameter tune changes from the original code.
The text was updated successfully, but these errors were encountered: