Add Semantic Segmentation Project

mvpcom · Jul 3, 2017 · ffdcad4 · ffdcad4
1 parent 2a396d2
commit ffdcad4
Show file tree

Hide file tree

Showing 6 changed files with 453 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -99,3 +99,5 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+RUNS/
diff --git a/README.md b/README.md
@@ -1 +1,32 @@
-# CarND-Semantic-Segmentation
+# Semantic Segmentation
+### Introduction
+In this project, you'll label the pixels of a road in images using a Fully Convolutional Network (FCN).
+
+### Setup
+##### Frameworks and Packages
+Make sure you have the following is installed:
+ - [Python 3](https://www.python.org/)
+ - [TensorFlow](https://www.tensorflow.org/)
+ - [NumPy](http://www.numpy.org/)
+ - [SciPy](https://www.scipy.org/)
+##### Dataset
+Download the [Kitti Road dataset](http://www.cvlibs.net/datasets/kitti/eval_road.php) from [here](http://www.cvlibs.net/download.php?file=data_road.zip).  Extract the dataset in the `data` folder.  This will create the folder `data_road` with all the training a test images.
+
+### Start
+##### Implement
+Implement the code in the `main.py` module indicated by the "TODO" comments.
+The comments indicated with "OPTIONAL" tag are not required to complete.
+##### Run
+Run the following command to run the project:
+```
+python main.py
+```
+
+### Submission
+1. Ensure you've passed all the unit tests.
+2. Ensure you pass all points on [the rubric](https://review.udacity.com/#!/rubrics/989/view).
+3. Submit the following in a zip file.
+ - `helper.py`
+ - `main.py`
+ - `project_tests.py`
+ - Newest inference images from `runs` folder
diff --git a/data/.gitignore b/data/.gitignore
@@ -0,0 +1,5 @@
+data_road/
+vgg/
+gtFine_trainvaltest/
+
+vgg16.npy
diff --git a/helper.py b/helper.py
@@ -0,0 +1,140 @@
+import re
+import random
+import numpy as np
+import os.path
+import scipy.misc
+import shutil
+import zipfile
+import time
+import tensorflow as tf
+from glob import glob
+from urllib.request import urlretrieve
+from tqdm import tqdm
+
+
+class DLProgress(tqdm):
+    last_block = 0
+
+    def hook(self, block_num=1, block_size=1, total_size=None):
+        self.total = total_size
+        self.update((block_num - self.last_block) * block_size)
+        self.last_block = block_num
+
+
+def maybe_download_pretrained_vgg(data_dir):
+    """
+    Download and extract pretrained vgg model if it doesn't exist
+    :param data_dir: Directory to download the model to
+    """
+    vgg_filename = 'vgg.zip'
+    vgg_path = os.path.join(data_dir, 'vgg')
+    vgg_files = [
+        os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'),
+        os.path.join(vgg_path, 'variables/variables.index'),
+        os.path.join(vgg_path, 'saved_model.pb')]
+
+    missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)]
+    if missing_vgg_files:
+        # Clean vgg dir
+        if os.path.exists(vgg_path):
+            shutil.rmtree(vgg_path)
+        os.makedirs(vgg_path)
+
+        # Download vgg
+        print('Downloading pre-trained vgg model...')
+        with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar:
+            urlretrieve(
+                'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip',
+                os.path.join(vgg_path, vgg_filename),
+                pbar.hook)
+
+        # Extract vgg
+        print('Extracting model...')
+        zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r')
+        zip_ref.extractall(data_dir)
+        zip_ref.close()
+
+        # Remove zip file to save space
+        os.remove(os.path.join(vgg_path, vgg_filename))
+
+
+def gen_batch_function(data_folder, image_shape):
+    """
+    Generate function to create batches of training data
+    :param data_folder: Path to folder that contains all the datasets
+    :param image_shape: Tuple - Shape of image
+    :return:
+    """
+    def get_batches_fn(batch_size):
+        """
+        Create batches of training data
+        :param batch_size: Batch Size
+        :return: Batches of training data
+        """
+        image_paths = glob(os.path.join(data_folder, 'image_2', '*.png'))
+        label_paths = {
+            re.sub(r'_(lane|road)_', '_', os.path.basename(path)): path
+            for path in glob(os.path.join(data_folder, 'gt_image_2', '*_road_*.png'))}
+        background_color = np.array([255, 0, 0])
+
+        random.shuffle(image_paths)
+        for batch_i in range(0, len(image_paths), batch_size):
+            images = []
+            gt_images = []
+            for image_file in image_paths[batch_i:batch_i+batch_size]:
+                gt_image_file = label_paths[os.path.basename(image_file)]
+
+                image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape)
+                gt_image = scipy.misc.imresize(scipy.misc.imread(gt_image_file), image_shape)
+
+                gt_bg = np.all(gt_image == background_color, axis=2)
+                gt_bg = gt_bg.reshape(*gt_bg.shape, 1)
+                gt_image = np.concatenate((gt_bg, np.invert(gt_bg)), axis=2)
+
+                images.append(image)
+                gt_images.append(gt_image)
+
+            yield np.array(images), np.array(gt_images)
+    return get_batches_fn
+
+
+def gen_test_output(sess, logits, keep_prob, image_pl, data_folder, image_shape):
+    """
+    Generate test output using the test images
+    :param sess: TF session
+    :param logits: TF Tensor for the logits
+    :param keep_prob: TF Placeholder for the dropout keep robability
+    :param image_pl: TF Placeholder for the image placeholder
+    :param data_folder: Path to the folder that contains the datasets
+    :param image_shape: Tuple - Shape of image
+    :return: Output for for each test image
+    """
+    for image_file in glob(os.path.join(data_folder, 'image_2', '*.png')):
+        image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape)
+
+        im_softmax = sess.run(
+            [tf.nn.softmax(logits)],
+            {keep_prob: 1.0, image_pl: [image]})
+        im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1])
+        segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1)
+        mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
+        mask = scipy.misc.toimage(mask, mode="RGBA")
+        street_im = scipy.misc.toimage(image)
+        street_im.paste(mask, box=None, mask=mask)
+
+        yield os.path.basename(image_file), np.array(street_im)
+
+
+def save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image):
+    # Make folder for current run
+    output_dir = os.path.join(runs_dir, str(time.time()))
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    # Run NN on test images and save them to HD
+    print('Training Finished. Saving test images to: {}'.format(output_dir))
+    image_outputs = gen_test_output(
+        sess, logits, keep_prob, input_image, os.path.join(data_dir, 'data_road/testing'), image_shape)
+    for name, image in image_outputs:
+        scipy.misc.imsave(os.path.join(output_dir, name), image)
diff --git a/main.py b/main.py
@@ -0,0 +1,120 @@
+import os.path
+import tensorflow as tf
+import helper
+import warnings
+from distutils.version import LooseVersion
+import project_tests as tests
+
+
+# Check TensorFlow Version
+assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
+print('TensorFlow Version: {}'.format(tf.__version__))
+
+# Check for a GPU
+if not tf.test.gpu_device_name():
+    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
+else:
+    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
+
+
+def load_vgg(sess, vgg_path):
+    """
+    Load Pretrained VGG Model into TensorFlow.
+    :param sess: TensorFlow Session
+    :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"
+    :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out)
+    """
+    # TODO: Implement function
+    #   Use tf.saved_model.loader.load to load the model and weights
+    vgg_tag = 'vgg16'
+    vgg_input_tensor_name = 'image_input:0'
+    vgg_keep_prob_tensor_name = 'keep_prob:0'
+    vgg_layer3_out_tensor_name = 'layer3_out:0'
+    vgg_layer4_out_tensor_name = 'layer4_out:0'
+    vgg_layer7_out_tensor_name = 'layer7_out:0'
+
+    return None, None, None, None, None
+tests.test_load_vgg(load_vgg, tf)
+
+
+def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
+    """
+    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
+    :param vgg_layer7_out: TF Tensor for VGG Layer 3 output
+    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
+    :param vgg_layer3_out: TF Tensor for VGG Layer 7 output
+    :param num_classes: Number of classes to classify
+    :return: The Tensor for the last layer of output
+    """
+    # TODO: Implement function
+    return None
+tests.test_layers(layers)
+
+
+def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
+    """
+    Build the TensorFLow loss and optimizer operations.
+    :param nn_last_layer: TF Tensor of the last layer in the neural network
+    :param correct_label: TF Placeholder for the correct label image
+    :param learning_rate: TF Placeholder for the learning rate
+    :param num_classes: Number of classes to classify
+    :return: Tuple of (logits, train_op, cross_entropy_loss)
+    """
+    # TODO: Implement function
+    return None, None, None
+tests.test_optimize(optimize)
+
+
+def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
+             correct_label, keep_prob, learning_rate):
+    """
+    Train neural network and print out the loss during training.
+    :param sess: TF Session
+    :param epochs: Number of epochs
+    :param batch_size: Batch size
+    :param get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)
+    :param train_op: TF Operation to train the neural network
+    :param cross_entropy_loss: TF Tensor for the amount of loss
+    :param input_image: TF Placeholder for input images
+    :param correct_label: TF Placeholder for label images
+    :param keep_prob: TF Placeholder for dropout keep probability
+    :param learning_rate: TF Placeholder for learning rate
+    """
+    # TODO: Implement function
+    pass
+tests.test_train_nn(train_nn)
+
+
+def run():
+    num_classes = 2
+    image_shape = (160, 576)
+    data_dir = './data'
+    runs_dir = './runs'
+    tests.test_for_kitti_dataset(data_dir)
+
+    # Download pretrained vgg model
+    helper.maybe_download_pretrained_vgg(data_dir)
+
+    # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset.
+    # You'll need a GPU with at least 10 teraFLOPS to train on.
+    #  https://www.cityscapes-dataset.com/
+
+    with tf.Session() as sess:
+        # Path to vgg model
+        vgg_path = os.path.join(data_dir, 'vgg')
+        # Create function to get batches
+        get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)
+
+        # OPTIONAL: Augment Images for better results
+        #  https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network
+
+        # TODO: Build NN using load_vgg, layers, and optimize function
+
+        # TODO: Train NN using the train_nn function
+
+        # TODO: Save inference data using helper.save_inference_samples
+        #  helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
+
+
+if __name__ == '__main__':
+    run()