Merge pull request #4 from andreped/fix-linting

Fix linting in script and notebooks; add linting for notebooks by default
AICAN-Research · Jun 26, 2024 · bfd8fda · bfd8fda
2 parents 21fc2ba + 313b503
commit bfd8fda
Show file tree

Hide file tree

Showing 6 changed files with 249 additions and 161 deletions.
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -20,7 +20,7 @@ jobs:
           python-version: "3.8"
 
       - name: Install lint dependencies
-        run: pip install wheel setuptools black==22.3.0 isort==5.10.1 flake8==4.0.1
+        run: pip install wheel setuptools isort==5.10.1 flake8==4.0.1 black==22.3.0 "black[jupyter]"
 
       - name: Lint the code
         run: sh shell/lint.sh
diff --git a/FindOptimumNumberOfClasses.py b/FindOptimumNumberOfClasses.py
@@ -1,17 +1,17 @@
 import os
+import random
+
 import numpy as np
-import tensorflow as tf
+from sklearn.metrics import silhouette_score
 from tensorflow.keras.applications import MobileNetV2
-from tensorflow.keras.preprocessing import image
 from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
-from scipy.spatial.distance import cdist
-from sklearn.metrics import silhouette_score
-import random
+from tensorflow.keras.preprocessing import image
+
 
 class ImageProcessor:
     def __init__(self, image_directory):
         self.image_directory = image_directory
-        self.model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
+        self.model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
 
     def load_and_preprocess_image(self, img_path):
         img = image.load_img(img_path, target_size=(224, 224))
@@ -29,6 +29,7 @@ def extract_features(self):
             filenames.append(filename)
         return np.array(features), filenames
 
+
 class GeneticAlgorithm:
     def __init__(self, population_size, generations, mutation_rate, max_clusters):
         self.population_size = population_size
@@ -37,7 +38,10 @@ def __init__(self, population_size, generations, mutation_rate, max_clusters):
         self.max_clusters = max_clusters
 
     def initialize_population(self, num_images):
-        return [np.random.randint(1, min(i + 2, self.max_clusters + 1), size=num_images) for i in range(self.population_size)]
+        return [
+            np.random.randint(1, min(i + 2, self.max_clusters + 1), size=num_images)
+            for i in range(self.population_size)
+        ]
 
     def fitness(self, individual, features):
         try:
@@ -62,7 +66,7 @@ def evolve(self, features):
         for generation in range(self.generations):
             fitness_scores = [self.fitness(ind, features) for ind in population]
             sorted_indices = np.argsort(fitness_scores)
-            best_individuals = [population[idx] for idx in sorted_indices[-(self.population_size // 2):]]
+            best_individuals = [population[idx] for idx in sorted_indices[-(self.population_size // 2) :]]
 
             next_generation = best_individuals[:]
             while len(next_generation) < self.population_size:
@@ -77,6 +81,7 @@ def evolve(self, features):
 
         return max(population, key=lambda ind: self.fitness(ind, features))
 
+
 class ImageClassifier:
     def __init__(self, image_directory, output_file):
         self.processor = ImageProcessor(image_directory)
@@ -89,11 +94,11 @@ def run(self):
         self.output_classification(optimal_classes, filenames)
 
     def output_classification(self, classes, filenames):
-        with open(self.output_file, 'w') as file:
+        with open(self.output_file, "w") as file:
             for filename, cluster in zip(filenames, classes):
                 file.write(f"{filename}, {cluster}\n")
 
 
 if __name__ == "__main__":
-    classifier = ImageClassifier('path_to_images', 'output.txt')
+    classifier = ImageClassifier("path_to_images", "output.txt")
     classifier.run()
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ pip install -r requirements.txt
 First install linting dependencies:
 
 ```
-pip install black==22.3.0 isort==5.10.1 flake8==4.0.1
+pip install isort==5.10.1 flake8==4.0.1 black==22.3.0 "black[jupyter]"
 ```
 
 Then run linting test by:

diff --git a/notebooks/ManyShotTransferLearning.ipynb b/notebooks/ManyShotTransferLearning.ipynb
@@ -20,7 +20,7 @@
     "from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint\n",
     "from MLD import multi_lens_distortion\n",
     "\n",
-    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' \n",
+    "os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"\n",
     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"  # Select GPU"
    ]
   },
@@ -33,10 +33,11 @@
     "IMG_SIZE = (224, 224)\n",
     "IMG_SHAPE = IMG_SIZE + (3,)\n",
     "\n",
+    "\n",
     "def network_1():\n",
     "    # Load pre-trained DenseNet201 and ResNet101V2 models\n",
-    "    dense_net_full = tf.keras.applications.DenseNet201(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')\n",
-    "    res_net_full = tf.keras.applications.ResNet101V2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')\n",
+    "    dense_net_full = tf.keras.applications.DenseNet201(input_shape=IMG_SHAPE, include_top=False, weights=\"imagenet\")\n",
+    "    res_net_full = tf.keras.applications.ResNet101V2(input_shape=IMG_SHAPE, include_top=False, weights=\"imagenet\")\n",
     "\n",
     "    # Create a new model with only the first 54 layers of DenseNet201\n",
     "    dense_net = tf.keras.Model(inputs=dense_net_full.input, outputs=dense_net_full.layers[178].output)\n",
@@ -60,18 +61,19 @@
     "\n",
     "    # Dense layers for classification\n",
     "    z = layers.Dropout(0.4)(concatenated)\n",
-    "    z = layers.Dense(512, activation='relu')(z)\n",
+    "    z = layers.Dense(512, activation=\"relu\")(z)\n",
     "    # z = layers.Dropout(0.2)(z)\n",
-    "    z = layers.Dense(2, activation='softmax')(z)\n",
+    "    z = layers.Dense(2, activation=\"softmax\")(z)\n",
     "\n",
     "    # Final model\n",
     "    model = Model(inputs=input, outputs=z)\n",
-    "    model.compile(optimizer=optimizers.Adam(1e-4), loss=\"CategoricalCrossentropy\", metrics=['accuracy'])\n",
+    "    model.compile(optimizer=optimizers.Adam(1e-4), loss=\"CategoricalCrossentropy\", metrics=[\"accuracy\"])\n",
     "\n",
     "    model.summary()\n",
     "    return model\n",
     "\n",
-    "model = network_1()\n"
+    "\n",
+    "model = network_1()"
    ]
   },
   {
@@ -81,8 +83,9 @@
    "outputs": [],
    "source": [
     "from os import walk\n",
-    "filenames = next(walk('./NLCB/Data3/'), (None, None, []))[2]  # [] if no file\n",
-    "filenames_val = next(walk('./NLCB/Data3/Validation/'), (None, None, []))[2]  # [] if no file"
+    "\n",
+    "filenames = next(walk(\"./NLCB/Data3/\"), (None, None, []))[2]  # [] if no file\n",
+    "filenames_val = next(walk(\"./NLCB/Data3/Validation/\"), (None, None, []))[2]  # [] if no file"
    ]
   },
   {
@@ -92,21 +95,20 @@
    "outputs": [],
    "source": [
     "def custom_data_generator(directory):\n",
-    "    for filepath in glob.glob(os.path.join(directory, '*.png')):  # assuming jpeg images\n",
+    "    for filepath in glob.glob(os.path.join(directory, \"*.png\")):  # assuming jpeg images\n",
     "        image = tf.io.read_file(filepath)\n",
     "        image = tf.image.decode_jpeg(image, channels=3)\n",
     "        label = []\n",
-    "        label[0] = 1 if filepath[4] == 'n' else 0  # Check the 5th character from the end for 'n'\n",
-    "        label[1] = 0 if filepath[4] != 'n' else 1\n",
+    "        label[0] = 1 if filepath[4] == \"n\" else 0  # Check the 5th character from the end for 'n'\n",
+    "        label[1] = 0 if filepath[4] != \"n\" else 1\n",
     "        yield image, label\n",
     "\n",
     "\n",
-    "\n",
     "def custom_preprocessing_function(img):\n",
     "\n",
-    "    if tf.random.uniform((), minval= 0, maxval=1) > 0.5:\n",
+    "    if tf.random.uniform((), minval=0, maxval=1) > 0.5:\n",
     "        nbr_rot = tf.random.uniform(shape=[], minval=1, maxval=4, dtype=tf.int32)\n",
-    "        img =tf.image.rot90(img, k=nbr_rot)\n",
+    "        img = tf.image.rot90(img, k=nbr_rot)\n",
     "\n",
     "    img = tf.image.random_hue(img, 0.08)\n",
     "    img = tf.image.random_contrast(img, 0.7, 1.3)\n",
@@ -117,20 +119,18 @@
     "    # print(img.shape)\n",
     "    # img = tf.image.random_crop(img, (int(img.shape[0]/2),int(img.shape[1]/2), 3))\n",
     "    img = tf.image.random_crop(img, (224, 224, 3))\n",
-    "    img = img/255.\n",
-    "    img = tf.image.resize(img,(224,224))\n",
-    "    img = tf.numpy_function(\n",
-    "        multi_lens_distortion, \n",
-    "        [img, 4, (80, 110), (-0.4, 0.4)],   \n",
-    "        tf.uint8\n",
-    "    )\n",
+    "    img = img / 255.0\n",
+    "    img = tf.image.resize(img, (224, 224))\n",
+    "    img = tf.numpy_function(multi_lens_distortion, [img, 4, (80, 110), (-0.4, 0.4)], tf.uint8)\n",
     "\n",
     "    return img\n",
     "\n",
+    "\n",
     "def validation_preprocessing_function(img):\n",
     "    # img = tf.image.random_crop(img, (224, 224, 3))\n",
-    "    img = img/255.\n",
-    "    img = tf.image.resize(img,(224,224))\n",
+    "    img = img / 255.0\n",
+    "    img = tf.image.resize(img, (224, 224))\n",
+    "\n",
     "\n",
     "# Paths\n",
     "train_data_dir = \"./NLCB/Data3/Training/\"\n",
@@ -142,29 +142,20 @@
     "    shear_range=0.2,\n",
     "    zoom_range=0.2,\n",
     "    horizontal_flip=True,\n",
-    "    preprocessing_function=custom_preprocessing_function  # Add more augmentations here\n",
+    "    preprocessing_function=custom_preprocessing_function,  # Add more augmentations here\n",
     ")\n",
     "\n",
     "# Create a data generator for validation data\n",
     "validation_datagen = ImageDataGenerator(preprocessing_function=validation_preprocessing_function)\n",
     "\n",
     "# Use custom data generator for training and validation datasets\n",
     "train_generator = train_datagen.flow_from_directory(\n",
-    "    train_data_dir,\n",
-    "    target_size=(224, 224),\n",
-    "    batch_size=16,\n",
-    "    class_mode='categorical',\n",
-    "    shuffle=True\n",
+    "    train_data_dir, target_size=(224, 224), batch_size=16, class_mode=\"categorical\", shuffle=True\n",
     ")\n",
     "\n",
     "validation_generator = validation_datagen.flow_from_directory(\n",
-    "    validation_data_dir,\n",
-    "    target_size=(224, 224),\n",
-    "    batch_size=16,\n",
-    "    class_mode='categorical',\n",
-    "    shuffle=False\n",
-    ")\n",
-    "\n"
+    "    validation_data_dir, target_size=(224, 224), batch_size=16, class_mode=\"categorical\", shuffle=False\n",
+    ")"
    ]
   },
   {
@@ -173,7 +164,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.compile(optimizer=optimizers.Adamax(1e-4), loss=\"CategoricalCrossentropy\", metrics=['accuracy'])\n",
+    "model.compile(optimizer=optimizers.Adamax(1e-4), loss=\"CategoricalCrossentropy\", metrics=[\"accuracy\"])\n",
     "\n",
     "# # considering you want to monitor accuracy:\n",
     "# acc_thresh = 0.95\n",
@@ -204,19 +195,19 @@
     "\n",
     "# Setting up callbacks for early stopping on minimum validation loss and saving the best model\n",
     "early_stopping_callback = EarlyStopping(\n",
-    "    monitor='val_loss',\n",
+    "    monitor=\"val_loss\",\n",
     "    patience=patience,\n",
     "    verbose=1,\n",
-    "    mode='min',\n",
-    "    restore_best_weights=True  # Restores model weights from the epoch with the best value of the monitored quantity.\n",
+    "    mode=\"min\",\n",
+    "    restore_best_weights=True,  # Restores model weights from the epoch with the best value of the monitored quantity.\n",
     ")\n",
     "\n",
     "model_checkpoint_callback = ModelCheckpoint(\n",
-    "    './PWCModel/best_model.h5',  # Path where the model will be saved\n",
-    "    monitor='val_loss',\n",
+    "    \"./PWCModel/best_model.h5\",  # Path where the model will be saved\n",
+    "    monitor=\"val_loss\",\n",
     "    save_best_only=True,  # Only the best model according to the validation loss is saved\n",
-    "    mode='min',\n",
-    "    verbose=1\n",
+    "    mode=\"min\",\n",
+    "    verbose=1,\n",
     ")\n",
     "\n",
     "history = model.fit(\n",
@@ -225,11 +216,11 @@
     "    validation_data=validation_generator,\n",
     "    validation_steps=len(validation_generator),\n",
     "    epochs=200,\n",
-    "    callbacks=[early_stopping_callback, model_checkpoint_callback]\n",
+    "    callbacks=[early_stopping_callback, model_checkpoint_callback],\n",
     ")\n",
     "\n",
     "# Save the overall model after training (optional, as the best model is already saved)\n",
-    "model.save('./PWCModel/best_PWC_model.h5')\n"
+    "model.save(\"./PWCModel/best_PWC_model.h5\")"
    ]
   },
   {
@@ -252,20 +243,20 @@
    "outputs": [],
    "source": [
     "# summarize history for accuracy\n",
-    "plt.plot(history.history['accuracy'])\n",
-    "plt.plot(history.history['val_accuracy'])\n",
-    "plt.title('model accuracy')\n",
-    "plt.ylabel('accuracy')\n",
-    "plt.xlabel('epoch')\n",
-    "plt.legend(['Training', 'Validation'], loc='upper left')\n",
+    "plt.plot(history.history[\"accuracy\"])\n",
+    "plt.plot(history.history[\"val_accuracy\"])\n",
+    "plt.title(\"model accuracy\")\n",
+    "plt.ylabel(\"accuracy\")\n",
+    "plt.xlabel(\"epoch\")\n",
+    "plt.legend([\"Training\", \"Validation\"], loc=\"upper left\")\n",
     "plt.show()\n",
     "# summarize history for loss\n",
-    "plt.plot(history.history['loss'])\n",
-    "plt.plot(history.history['val_loss'])\n",
-    "plt.title('model loss')\n",
-    "plt.ylabel('loss')\n",
-    "plt.xlabel('epoch')\n",
-    "plt.legend(['Training', 'Validation'], loc='upper left')\n",
+    "plt.plot(history.history[\"loss\"])\n",
+    "plt.plot(history.history[\"val_loss\"])\n",
+    "plt.title(\"model loss\")\n",
+    "plt.ylabel(\"loss\")\n",
+    "plt.xlabel(\"epoch\")\n",
+    "plt.legend([\"Training\", \"Validation\"], loc=\"upper left\")\n",
     "plt.show()"
    ]
   }