roboflow · yeldarby · Sep 30, 2024 · Sep 30, 2024 · Oct 1, 2024 · Oct 1, 2024
diff --git a/scripts/download_datasets.sh b/scripts/download_datasets.sh
@@ -9,22 +9,37 @@ do
         l) location=${OPTARG};;
     esac
 done
-# default values
+
+# Default values
 format=${format:-coco}
 location=${location:-$(pwd)/rf100}
 
-echo "Starting downloading RF100..."
+echo "Starting downloading RF100 in parallel..."
 
-for link in $(cat $input)
-do
-    attributes=$(python3 $(pwd)/scripts/parse_dataset_link.py  -l $link)
-
-    project=$(echo $attributes | cut -d' ' -f 3)
-    version=$(echo $attributes | cut -d' ' -f 4)
-    if [ ! -d  "$location/$project" ] ;
-    then
-        python3 $(pwd)/scripts/download_dataset.py -p $project -v $version -l $location -f $format
+# Create the datasets directory if it doesn't exist
+mkdir -p "$location"
+
+# Function to download a single dataset
+download_single_dataset() {
+    local link=$1
+    local format=$2
+    local location=$3
+
+    attributes=$(python3 "$(pwd)/scripts/parse_dataset_link.py" -l "$link")
+    project=$(echo "$attributes" | cut -d' ' -f 3)
+    version=$(echo "$attributes" | cut -d' ' -f 4)
+
+    if [ ! -d  "$location/$project" ] ; then
+        echo "Downloading dataset $project..."
+        python3 "$(pwd)/scripts/download_dataset.py" -p "$project" -v "$version" -l "$location" -f "$format"
+    else
+        echo "Dataset $project already exists. Skipping download."
     fi
-done
+}
+
+export -f download_single_dataset
+
+# Read the dataset links and download in parallel
+cat "$input" | xargs -P20 -I{} bash -c 'download_single_dataset "{}" "'"$format"'" "'"$location"'"'
 
-echo "Done!"
+echo "Done downloading datasets!"
diff --git a/yolov11-benchmark/final_eval.txt b/yolov11-benchmark/final_eval.txt
@@ -0,0 +1,99 @@
+4-fold-defect 0.954
+abdomen-mri 0.962
+acl-x-ray 0.995
+activity-diagrams-qdobr 0.630
+aerial-cows 0.771
+aerial-pool 0.743
+aerial-spheres 0.994
+animals-ij5d2 0.837
+apex-videogame 0.829
+apples-fvpl5 0.778
+aquarium-qlnqy 0.746
+asbestos 0.623
+avatar-recognition-nuexe 0.855
+axial-mri 0.491
+bacteria-ptywi 0.761
+bccd-ouzjz 0.912
+bees-jt5in 0.882
+bone-fracture-7fylg 0.294
+brain-tumor-m2pbp 0.820
+cable-damage 0.914
+cables-nl42k 0.758
+cavity-rs0uf 0.755
+cell-towers 0.930
+cells-uyemf 0.890
+chess-pieces-mjzgj 0.986
+circuit-elements 0.127
+circuit-voltages 0.829
+cloud-types 0.329
+coins-1apki 0.855
+construction-safety-gsnvb 0.895
+coral-lwptl 0.154
+corrosion-bi3q3 0.737
+cotton-20xz5 0.304
+cotton-plant-disease 0.180
+csgo-videogame 0.970
+currency-v4f8j 0.979
+document-parts 0.684
+excavators-czvg9 0.921
+farcry6-videogame 0.557
+fish-market-ggjso 0.993
+flir-camera-objects 0.787
+furniture-ngpea 0.987
+gauge-u2lwv 0.673
+grass-weeds 0.770
+gynecology-mri 0.209
+halo-infinite-angel-videogame 0.879
+hand-gestures-jps7z 0.995
+insects-mytwu 0.913
+leaf-disease-nsdsr 0.600
+lettuce-pallets 0.942
+liver-disease 0.560
+marbles 0.986
+mask-wearing-608pr 0.850
+mitosis-gjs3g 0.934
+number-ops 0.992
+paper-parts 0.787
+paragraphs-co84b 0.577
+parasites-1s07h 0.876
+peanuts-sd4kf 0.995
+peixos-fish 0.803
+people-in-paintings 0.533
+pests-2xlvx 0.171
+phages 0.846
+pills-sxdht 0.903
+poker-cards-cxcvz 0.995
+printed-circuit-board 0.346
+radio-signal 0.711
+road-signs-6ih4y 0.966
+road-traffic 0.779
+robomasters-285km 0.775
+secondary-chains 0.328
+sedimentary-features-9eosf 0.432
+shark-teeth-5atku 0.938
+sign-language-sokdr 0.935
+signatures-xc8up 0.894
+smoke-uvylj 0.959
+soccer-players-5fuqs 0.961
+soda-bottles 0.958
+solar-panels-taxvb 0.845
+stomata-cells 0.845
+street-work 0.673
+tabular-data-wf9uh 0.783
+team-fight-tactics 0.968
+thermal-cheetah-my4dp 0.867
+thermal-dogs-and-people-x6ejw 0.977
+trail-camera 0.966
+truck-movement 0.851
+tweeter-posts 0.846
+tweeter-profile 0.986
+underwater-objects-5v7p8 0.661
+underwater-pipes-4ng4t 0.995
+uno-deck 0.994
+valentines-chocolate 0.982
+vehicles-q0x2v 0.442
+wall-damage 0.569
+washroom-rf1fa 0.610
+weed-crop-aerial 0.761
+wine-labels 0.630
+x-ray-rheumatology 0.842
diff --git a/yolov11-benchmark/parse_eval.py b/yolov11-benchmark/parse_eval.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+import argparse
+import pandas as pd
+import os
+
+# Construct the argument parser and parse the arguments
+ap = argparse.ArgumentParser()
+ap.add_argument("-d", "--dataset", required=True, help="Dataset name")
+ap.add_argument("-r", "--results_dir", required=True, help="Directory containing results.csv")
+ap.add_argument("-o", "--output", required=True, help="Output file to write")
+args = ap.parse_args()
+
+results_csv = os.path.join(args.results_dir, "results.csv")
+
+# Check if results.csv exists
+if not os.path.isfile(results_csv):
+    print(f"results.csv not found in {args.results_dir}")
+    exit(1)
+
+# Read the results.csv file
+df = pd.read_csv(results_csv)
+
+# Strip leading and trailing spaces from column names
+df.columns = df.columns.str.strip()
+
+# Get the last row (final epoch)
+final_epoch = df.iloc[-1]
+
+# Print available columns for debugging
+print("Available columns in results.csv:")
+print(df.columns.tolist())
+
+# Extract the [email protected] value
+try:
+    map50 = final_epoch['metrics/mAP50(B)']
+except KeyError:
+    print("Column 'metrics/mAP50(B)' not found in results.csv.")
+    print("Available columns are:", df.columns.tolist())
+    exit(1)
+
+# Format the mAP value to 3 decimal places
+map50_formatted = f"{map50:.3f}"
+
+# Append the dataset and mAP value to the output file
+with open(args.output, "a") as f:
+    f.write(f"{args.dataset} {map50_formatted}\n")
+
+print(f"Dataset: {args.dataset}, [email protected]: {map50_formatted}")
diff --git a/yolov11-benchmark/train.sh b/yolov11-benchmark/train.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# set -euo pipefail
+
+# Store the current directory
+SCRIPT_DIR="$(pwd)"
+
+# Set the datasets directory to ./rf100
+datasets_dir="$SCRIPT_DIR/rf100"
+echo "Datasets directory: $datasets_dir"
+
+# Download datasets if not present
+if [ -z "$(ls -A "$datasets_dir")" ]; then
+    echo "Downloading datasets..."
+    chmod +x "$SCRIPT_DIR/scripts/download_datasets.sh"
+    "$SCRIPT_DIR/scripts/download_datasets.sh" -l "$datasets_dir" -f yolov11
+fi
+
+# Prepare the results directory
+dir="$SCRIPT_DIR/runs/yolo-v11"
+mkdir -p "$dir"
+if [ ! -f "$dir/final_eval.txt" ]; then
+    touch "$dir/final_eval.txt"
+fi
+
+cd "$SCRIPT_DIR/yolov11-benchmark/"
+
+# Install dependencies if needed
+echo "Installing dependencies..."
+# Comment out apt-get commands if you don't have root permissions
+# sudo apt-get update && sudo apt-get install -y libfreetype6-dev
+
+pip install --user git+https://github.com/ultralytics/ultralytics.git
+echo "Dependencies installed."
+
+# Download the model file
+echo "Downloading model file..."
+wget -nc https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt
+echo "Model file downloaded."
+
+# Set the model path
+model_path="$SCRIPT_DIR/yolov11-benchmark/yolo11s.pt"
+
+# Verify that the model file exists
+if [ ! -f "$model_path" ]; then
+    echo "Model file not found at $model_path. Exiting."
+    exit 1
+fi
+
+# Get list of datasets
+datasets=("$datasets_dir"/*)
+num_datasets=${#datasets[@]}
+echo "Number of datasets found: $num_datasets"
+
+if [ "$num_datasets" -eq 0 ]; then
+    echo "No datasets found in $datasets_dir. Exiting."
+    exit 1
+fi
+
+num_gpus=8          # Number of GPUs
+max_jobs_per_gpu=4  # Number of jobs per GPU
+
+# Function to train on a single dataset
+train_dataset() {
+    local dataset="$1"
+
+    # Find an available GPU and slot using lock files
+    while true; do
+        for ((gpu_id=0; gpu_id<num_gpus; gpu_id++)); do
+            for ((slot_id=0; slot_id<max_jobs_per_gpu; slot_id++)); do
+                lock_file="/tmp/gpu_lock_${gpu_id}_${slot_id}"
+                exec {lock_fd}>$lock_file || continue
+                if flock -n "$lock_fd"; then
+                    # Acquired lock for this GPU and slot
+                    echo "Assigned GPU $gpu_id (slot $slot_id) to dataset $dataset"
+                    # Start training
+                    dataset_name=$(basename "$dataset")
+                    results_dir="$dir/$dataset_name"
+
+                    if [ ! -f "$results_dir/train/weights/best.pt" ]; then
+                        yolo detect train data="$dataset/data.yaml" model="$model_path" epochs=100 batch=16 device="$gpu_id" project="$results_dir" name=train
+
+                        yolo detect val data="$dataset/data.yaml" model="$results_dir/train/weights/best.pt" device="$gpu_id" project="$results_dir" name=val
+
+                        python3 "$SCRIPT_DIR/yolov11-benchmark/parse_eval.py" -d "$dataset_name" -r "$results_dir/train" -o "$dir/final_eval.txt"
+                    else
+                        echo "Results for $dataset already exist. Skipping training."
+                    fi
+
+                    # Release the lock
+                    flock -u "$lock_fd"
+                    exec {lock_fd}>&-
+                    rm -f "$lock_file"
+
+                    return 0
+                else
+                    # Could not acquire lock; slot is in use
+                    exec {lock_fd}>&-
+                fi
+            done
+        done
+        # Wait before trying again
+        sleep 5
+    done
+}
+
+export -f train_dataset
+export model_path
+export dir
+export SCRIPT_DIR
+export max_jobs_per_gpu
+export num_gpus
+
+# Start training datasets with parallel execution
+for dataset in "${datasets[@]}"; do
+    train_dataset "$dataset" &
+done
+
+# Wait for all remaining jobs to finish
+wait
+
+echo "Done training all the datasets with YOLOv11!"