feat: ✨ Introduce support for visual prompt grounding model (#568)

CVHub520 · Oct 23, 2024 · 8f5768b · 8f5768b
1 parent ba719c5
commit 8f5768b
Show file tree

Hide file tree

Showing 79 changed files with 15,858 additions and 0 deletions.
diff --git a/anylabeling/configs/auto_labeling/models.yaml b/anylabeling/configs/auto_labeling/models.yaml
@@ -1,3 +1,5 @@
+- model_name: "open_vision-r20241010"
+  config_file: ":/open_vision.yaml"
 - model_name: "sam2_hiera_base-r20240801"
   config_file: ":/sam2_hiera_base.yaml"
 - model_name: "sam2_hiera_large_video-r20240901"

diff --git a/anylabeling/configs/auto_labeling/open_vision.yaml b/anylabeling/configs/auto_labeling/open_vision.yaml
@@ -0,0 +1,10 @@
+type: open_vision
+name: open_vision-r20241010
+display_name: Open Vision
+# text_encoder_type: path/to/bert-base-uncased
+text_encoder_type: /home/cvhub/workspace/projects/python/multi-modal/CountGD/XGD/checkpoints/bert-base-uncased
+model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v2.5.0/open_vision_fsc147.pth
+box_threshold: 0.3
+# Segment-Anything-2 (large)
+encoder_model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v2.4.0/sam2_hiera_large.encoder.onnx
+decoder_model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v2.4.0/sam2_hiera_large.decoder.onnx
diff --git a/anylabeling/services/auto_labeling/model_manager.py b/anylabeling/services/auto_labeling/model_manager.py
@@ -20,6 +20,7 @@ class ModelManager(QObject):
 
     MAX_NUM_CUSTOM_MODELS = 5
     CUSTOM_MODELS = [
+        "open_vision",
         "segment_anything",
         "segment_anything_2",
         "segment_anything_2_video",
@@ -1059,6 +1060,31 @@ def _load_model(self, model_id):  # noqa: C901
                 return
             # Request next files for prediction
             self.request_next_files_requested.emit()
+        elif model_config["type"] == "open_vision":
+            from .open_vision import OpenVision
+
+            try:
+                model_config["model"] = OpenVision(
+                    model_config, on_message=self.new_model_status.emit
+                )
+                self.auto_segmentation_model_selected.emit()
+                logger.info(
+                    f"✅ Model loaded successfully: {model_config['type']}"
+                )
+            except Exception as e:  # noqa
+                logger.error(
+                    f"❌ Error in loading model: {model_config['type']} with error: {str(e)}"
+                )
+                self.new_model_status.emit(
+                    self.tr(
+                        "Error in loading model: {error_message}".format(
+                            error_message=str(e)
+                        )
+                    )
+                )
+                return
+            # Request next files for prediction
+            self.request_next_files_requested.emit()
         elif model_config["type"] == "yolov5_obb":
             from .yolov5_obb import YOLOv5OBB
 
@@ -1841,6 +1867,7 @@ def set_auto_labeling_marks(self, marks):
             "yolov8_efficientvit_sam",
             "grounding_sam",
             "grounding_sam2",
+            "open_vision",
             "edge_sam",
         ]
         if (