diff --git a/.gitignore b/.gitignore index 7bbc71c092..f0b4d56201 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,5 @@ ENV/ # mypy .mypy_cache/ + +RUNS/ diff --git a/README.md b/README.md index 7a90e9c069..d9881d18bc 100644 --- a/README.md +++ b/README.md @@ -1 +1,32 @@ -# CarND-Semantic-Segmentation \ No newline at end of file +# Semantic Segmentation +### Introduction +In this project, you'll label the pixels of a road in images using a Fully Convolutional Network (FCN). + +### Setup +##### Frameworks and Packages +Make sure you have the following is installed: + - [Python 3](https://www.python.org/) + - [TensorFlow](https://www.tensorflow.org/) + - [NumPy](http://www.numpy.org/) + - [SciPy](https://www.scipy.org/) +##### Dataset +Download the [Kitti Road dataset](http://www.cvlibs.net/datasets/kitti/eval_road.php) from [here](http://www.cvlibs.net/download.php?file=data_road.zip). Extract the dataset in the `data` folder. This will create the folder `data_road` with all the training a test images. + +### Start +##### Implement +Implement the code in the `main.py` module indicated by the "TODO" comments. +The comments indicated with "OPTIONAL" tag are not required to complete. +##### Run +Run the following command to run the project: +``` +python main.py +``` + +### Submission +1. Ensure you've passed all the unit tests. +2. Ensure you pass all points on [the rubric](https://review.udacity.com/#!/rubrics/989/view). +3. Submit the following in a zip file. + - `helper.py` + - `main.py` + - `project_tests.py` + - Newest inference images from `runs` folder diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000000..e297607a5b --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,5 @@ +data_road/ +vgg/ +gtFine_trainvaltest/ + +vgg16.npy diff --git a/helper.py b/helper.py new file mode 100644 index 0000000000..9bf0f28b88 --- /dev/null +++ b/helper.py @@ -0,0 +1,140 @@ +import re +import random +import numpy as np +import os.path +import scipy.misc +import shutil +import zipfile +import time +import tensorflow as tf +from glob import glob +from urllib.request import urlretrieve +from tqdm import tqdm + + +class DLProgress(tqdm): + last_block = 0 + + def hook(self, block_num=1, block_size=1, total_size=None): + self.total = total_size + self.update((block_num - self.last_block) * block_size) + self.last_block = block_num + + +def maybe_download_pretrained_vgg(data_dir): + """ + Download and extract pretrained vgg model if it doesn't exist + :param data_dir: Directory to download the model to + """ + vgg_filename = 'vgg.zip' + vgg_path = os.path.join(data_dir, 'vgg') + vgg_files = [ + os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'), + os.path.join(vgg_path, 'variables/variables.index'), + os.path.join(vgg_path, 'saved_model.pb')] + + missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)] + if missing_vgg_files: + # Clean vgg dir + if os.path.exists(vgg_path): + shutil.rmtree(vgg_path) + os.makedirs(vgg_path) + + # Download vgg + print('Downloading pre-trained vgg model...') + with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar: + urlretrieve( + 'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip', + os.path.join(vgg_path, vgg_filename), + pbar.hook) + + # Extract vgg + print('Extracting model...') + zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r') + zip_ref.extractall(data_dir) + zip_ref.close() + + # Remove zip file to save space + os.remove(os.path.join(vgg_path, vgg_filename)) + + +def gen_batch_function(data_folder, image_shape): + """ + Generate function to create batches of training data + :param data_folder: Path to folder that contains all the datasets + :param image_shape: Tuple - Shape of image + :return: + """ + def get_batches_fn(batch_size): + """ + Create batches of training data + :param batch_size: Batch Size + :return: Batches of training data + """ + image_paths = glob(os.path.join(data_folder, 'image_2', '*.png')) + label_paths = { + re.sub(r'_(lane|road)_', '_', os.path.basename(path)): path + for path in glob(os.path.join(data_folder, 'gt_image_2', '*_road_*.png'))} + background_color = np.array([255, 0, 0]) + + random.shuffle(image_paths) + for batch_i in range(0, len(image_paths), batch_size): + images = [] + gt_images = [] + for image_file in image_paths[batch_i:batch_i+batch_size]: + gt_image_file = label_paths[os.path.basename(image_file)] + + image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape) + gt_image = scipy.misc.imresize(scipy.misc.imread(gt_image_file), image_shape) + + gt_bg = np.all(gt_image == background_color, axis=2) + gt_bg = gt_bg.reshape(*gt_bg.shape, 1) + gt_image = np.concatenate((gt_bg, np.invert(gt_bg)), axis=2) + + images.append(image) + gt_images.append(gt_image) + + yield np.array(images), np.array(gt_images) + return get_batches_fn + + +def gen_test_output(sess, logits, keep_prob, image_pl, data_folder, image_shape): + """ + Generate test output using the test images + :param sess: TF session + :param logits: TF Tensor for the logits + :param keep_prob: TF Placeholder for the dropout keep robability + :param image_pl: TF Placeholder for the image placeholder + :param data_folder: Path to the folder that contains the datasets + :param image_shape: Tuple - Shape of image + :return: Output for for each test image + """ + for image_file in glob(os.path.join(data_folder, 'image_2', '*.png')): + image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape) + + im_softmax = sess.run( + [tf.nn.softmax(logits)], + {keep_prob: 1.0, image_pl: [image]}) + im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) + segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1) + mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) + mask = scipy.misc.toimage(mask, mode="RGBA") + street_im = scipy.misc.toimage(image) + street_im.paste(mask, box=None, mask=mask) + + yield os.path.basename(image_file), np.array(street_im) + + +def save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image): + # Make folder for current run + output_dir = os.path.join(runs_dir, str(time.time())) + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + # Run NN on test images and save them to HD + print('Training Finished. Saving test images to: {}'.format(output_dir)) + image_outputs = gen_test_output( + sess, logits, keep_prob, input_image, os.path.join(data_dir, 'data_road/testing'), image_shape) + for name, image in image_outputs: + scipy.misc.imsave(os.path.join(output_dir, name), image) diff --git a/main.py b/main.py new file mode 100644 index 0000000000..2aa91b343f --- /dev/null +++ b/main.py @@ -0,0 +1,120 @@ +import os.path +import tensorflow as tf +import helper +import warnings +from distutils.version import LooseVersion +import project_tests as tests + + +# Check TensorFlow Version +assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer. You are using {}'.format(tf.__version__) +print('TensorFlow Version: {}'.format(tf.__version__)) + +# Check for a GPU +if not tf.test.gpu_device_name(): + warnings.warn('No GPU found. Please use a GPU to train your neural network.') +else: + print('Default GPU Device: {}'.format(tf.test.gpu_device_name())) + + +def load_vgg(sess, vgg_path): + """ + Load Pretrained VGG Model into TensorFlow. + :param sess: TensorFlow Session + :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb" + :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out) + """ + # TODO: Implement function + # Use tf.saved_model.loader.load to load the model and weights + vgg_tag = 'vgg16' + vgg_input_tensor_name = 'image_input:0' + vgg_keep_prob_tensor_name = 'keep_prob:0' + vgg_layer3_out_tensor_name = 'layer3_out:0' + vgg_layer4_out_tensor_name = 'layer4_out:0' + vgg_layer7_out_tensor_name = 'layer7_out:0' + + return None, None, None, None, None +tests.test_load_vgg(load_vgg, tf) + + +def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes): + """ + Create the layers for a fully convolutional network. Build skip-layers using the vgg layers. + :param vgg_layer7_out: TF Tensor for VGG Layer 3 output + :param vgg_layer4_out: TF Tensor for VGG Layer 4 output + :param vgg_layer3_out: TF Tensor for VGG Layer 7 output + :param num_classes: Number of classes to classify + :return: The Tensor for the last layer of output + """ + # TODO: Implement function + return None +tests.test_layers(layers) + + +def optimize(nn_last_layer, correct_label, learning_rate, num_classes): + """ + Build the TensorFLow loss and optimizer operations. + :param nn_last_layer: TF Tensor of the last layer in the neural network + :param correct_label: TF Placeholder for the correct label image + :param learning_rate: TF Placeholder for the learning rate + :param num_classes: Number of classes to classify + :return: Tuple of (logits, train_op, cross_entropy_loss) + """ + # TODO: Implement function + return None, None, None +tests.test_optimize(optimize) + + +def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, + correct_label, keep_prob, learning_rate): + """ + Train neural network and print out the loss during training. + :param sess: TF Session + :param epochs: Number of epochs + :param batch_size: Batch size + :param get_batches_fn: Function to get batches of training data. Call using get_batches_fn(batch_size) + :param train_op: TF Operation to train the neural network + :param cross_entropy_loss: TF Tensor for the amount of loss + :param input_image: TF Placeholder for input images + :param correct_label: TF Placeholder for label images + :param keep_prob: TF Placeholder for dropout keep probability + :param learning_rate: TF Placeholder for learning rate + """ + # TODO: Implement function + pass +tests.test_train_nn(train_nn) + + +def run(): + num_classes = 2 + image_shape = (160, 576) + data_dir = './data' + runs_dir = './runs' + tests.test_for_kitti_dataset(data_dir) + + # Download pretrained vgg model + helper.maybe_download_pretrained_vgg(data_dir) + + # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. + # You'll need a GPU with at least 10 teraFLOPS to train on. + # https://www.cityscapes-dataset.com/ + + with tf.Session() as sess: + # Path to vgg model + vgg_path = os.path.join(data_dir, 'vgg') + # Create function to get batches + get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) + + # OPTIONAL: Augment Images for better results + # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network + + # TODO: Build NN using load_vgg, layers, and optimize function + + # TODO: Train NN using the train_nn function + + # TODO: Save inference data using helper.save_inference_samples + # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) + + +if __name__ == '__main__': + run() diff --git a/project_tests.py b/project_tests.py new file mode 100644 index 0000000000..221483b57e --- /dev/null +++ b/project_tests.py @@ -0,0 +1,154 @@ +import sys +import os +from copy import deepcopy +from glob import glob +from unittest import mock + +import numpy as np +import tensorflow as tf + + +def test_safe(func): + """ + Isolate tests + """ + def func_wrapper(*args): + with tf.Graph().as_default(): + result = func(*args) + print('Tests Passed') + return result + + return func_wrapper + + +def _prevent_print(function, params): + sys.stdout = open(os.devnull, "w") + function(**params) + sys.stdout = sys.__stdout__ + + +def _assert_tensor_shape(tensor, shape, display_name): + assert tf.assert_rank(tensor, len(shape), message='{} has wrong rank'.format(display_name)) + + tensor_shape = tensor.get_shape().as_list() if len(shape) else [] + + wrong_dimension = [ten_dim for ten_dim, cor_dim in zip(tensor_shape, shape) + if cor_dim is not None and ten_dim != cor_dim] + assert not wrong_dimension, \ + '{} has wrong shape. Found {}'.format(display_name, tensor_shape) + + +class TmpMock(object): + """ + Mock a attribute. Restore attribute when exiting scope. + """ + def __init__(self, module, attrib_name): + self.original_attrib = deepcopy(getattr(module, attrib_name)) + setattr(module, attrib_name, mock.MagicMock()) + self.module = module + self.attrib_name = attrib_name + + def __enter__(self): + return getattr(self.module, self.attrib_name) + + def __exit__(self, type, value, traceback): + setattr(self.module, self.attrib_name, self.original_attrib) + + +@test_safe +def test_load_vgg(load_vgg, tf_module): + with TmpMock(tf_module.saved_model.loader, 'load') as mock_load_model: + vgg_path = '' + sess = tf.Session() + test_input_image = tf.placeholder(tf.float32, name='image_input') + test_keep_prob = tf.placeholder(tf.float32, name='keep_prob') + test_vgg_layer3_out = tf.placeholder(tf.float32, name='layer3_out') + test_vgg_layer4_out = tf.placeholder(tf.float32, name='layer4_out') + test_vgg_layer7_out = tf.placeholder(tf.float32, name='layer7_out') + + input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) + + assert mock_load_model.called, \ + 'tf.saved_model.loader.load() not called' + assert mock_load_model.call_args == mock.call(sess, ['vgg16'], vgg_path), \ + 'tf.saved_model.loader.load() called with wrong arguments.' + + assert input_image == test_input_image, 'input_image is the wrong object' + assert keep_prob == test_keep_prob, 'keep_prob is the wrong object' + assert vgg_layer3_out == test_vgg_layer3_out, 'layer3_out is the wrong object' + assert vgg_layer4_out == test_vgg_layer4_out, 'layer4_out is the wrong object' + assert vgg_layer7_out == test_vgg_layer7_out, 'layer7_out is the wrong object' + + +@test_safe +def test_layers(layers): + num_classes = 2 + vgg_layer3_out = tf.placeholder(tf.float32, [None, None, None, 256]) + vgg_layer4_out = tf.placeholder(tf.float32, [None, None, None, 512]) + vgg_layer7_out = tf.placeholder(tf.float32, [None, None, None, 4096]) + layers_output = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) + + _assert_tensor_shape(layers_output, [None, None, None, num_classes], 'Layers Output') + + +@test_safe +def test_optimize(optimize): + num_classes = 2 + shape = [2, 3, 4, num_classes] + layers_output = tf.Variable(tf.zeros(shape)) + correct_label = tf.placeholder(tf.float32, [None, None, None, num_classes]) + learning_rate = tf.placeholder(tf.float32) + logits, train_op, cross_entropy_loss = optimize(layers_output, correct_label, learning_rate, num_classes) + + _assert_tensor_shape(logits, [2*3*4, num_classes], 'Logits') + + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + sess.run([train_op], {correct_label: np.arange(np.prod(shape)).reshape(shape), learning_rate: 10}) + test, loss = sess.run([layers_output, cross_entropy_loss], {correct_label: np.arange(np.prod(shape)).reshape(shape)}) + + assert test.min() != 0 or test.max() != 0, 'Training operation not changing weights.' + + +@test_safe +def test_train_nn(train_nn): + epochs = 1 + batch_size = 2 + + def get_batches_fn(batach_size_parm): + shape = [batach_size_parm, 2, 3, 3] + return np.arange(np.prod(shape)).reshape(shape) + + train_op = tf.constant(0) + cross_entropy_loss = tf.constant(10.11) + input_image = tf.placeholder(tf.float32, name='input_image') + correct_label = tf.placeholder(tf.float32, name='correct_label') + keep_prob = tf.placeholder(tf.float32, name='keep_prob') + learning_rate = tf.placeholder(tf.float32, name='learning_rate') + with tf.Session() as sess: + parameters = { + 'sess': sess, + 'epochs': epochs, + 'batch_size': batch_size, + 'get_batches_fn': get_batches_fn, + 'train_op': train_op, + 'cross_entropy_loss': cross_entropy_loss, + 'input_image': input_image, + 'correct_label': correct_label, + 'keep_prob': keep_prob, + 'learning_rate': learning_rate} + _prevent_print(train_nn, parameters) + + +@test_safe +def test_for_kitti_dataset(data_dir): + kitti_dataset_path = os.path.join(data_dir, 'data_road') + training_labels_count = len(glob(os.path.join(kitti_dataset_path, 'training/gt_image_2/*_road_*.png'))) + training_images_count = len(glob(os.path.join(kitti_dataset_path, 'training/image_2/*.png'))) + testing_images_count = len(glob(os.path.join(kitti_dataset_path, 'testing/image_2/*.png'))) + + assert not (training_images_count == training_labels_count == testing_images_count == 0),\ + 'Kitti dataset not found. Extract Kitti dataset in {}'.format(kitti_dataset_path) + assert training_images_count == 289, 'Expected 289 training images, found {} images.'.format(training_images_count) + assert training_labels_count == 289, 'Expected 289 training labels, found {} labels.'.format(training_labels_count) + assert testing_images_count == 290, 'Expected 290 testing images, found {} images.'.format(testing_images_count)