Skip to content

Commit

Permalink
Bump version to v2.4.2
Browse files Browse the repository at this point in the history
  • Loading branch information
CVHub520 committed Sep 6, 2024
1 parent e72444a commit d9302d6
Show file tree
Hide file tree
Showing 28 changed files with 14,935 additions and 14,814 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
## 🥳 What's New

- Sep. 2024:
- Release version [2.4.2](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.4.2)
- 🔥🔥🔥 Added support for interactive video object tracking based on [Segment-Anything-2](https://github.com/CVHub520/segment-anything-2). [[Tutorial](examples/interactive_video_object_segmentation/README.md)]

<br>
Expand Down Expand Up @@ -199,6 +200,7 @@ For more details, please refer to 👉 [model_zoo](./docs/en/model_zoo.md) 👈
- [Tracking by OBB Object Detection](./examples/multiple_object_tracking/README.md)
- [Tracking by Instance Segmentation](./examples/multiple_object_tracking/README.md)
- [Tracking by Pose Estimation](./examples/multiple_object_tracking/README.md)
- [iVOS](./examples/interactive_video_object_segmentation/README.md)

## Contact

Expand Down
5 changes: 3 additions & 2 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
## 🥳 新功能

- 2024年9月:
- 🔥🔥🔥 支持基于[Segment-Anything-2](https://github.com/CVHub520/segment-anything-2)交互式视频目标追踪功能。【[教程](examples/interactive_video_object_segmentation/README.md)
- 发布[X-AnyLabeling v2.4.2](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.4.2)版本。
- 🔥🔥🔥 支持基于[Segment-Anything-2](https://github.com/CVHub520/segment-anything-2)交互式视频目标追踪功能。【[教程](examples/interactive_video_object_segmentation/README.md) | [B站](https://www.bilibili.com/video/BV1kMpwedE8M/)

<br>

Expand Down Expand Up @@ -202,7 +203,7 @@
- [Tracking by OBB Object Detection](./examples/multiple_object_tracking/README.md)
- [Tracking by Instance Segmentation](./examples/multiple_object_tracking/README.md)
- [Tracking by Pose Estimation](./examples/multiple_object_tracking/README.md)

- [iVOS](./examples/interactive_video_object_segmentation/README.md)

## 联系

Expand Down
2 changes: 1 addition & 1 deletion anylabeling/app_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__appname__ = "X-AnyLabeling"
__appdescription__ = "Advanced Auto Labeling Solution with Added Features"
__version__ = "2.4.1"
__version__ = "2.4.2"
__preferred_device__ = "CPU" # GPU or CPU
28,252 changes: 14,126 additions & 14,126 deletions anylabeling/resources/resources.py

Large diffs are not rendered by default.

534 changes: 267 additions & 267 deletions anylabeling/resources/translations/en_US.ts

Large diffs are not rendered by default.

534 changes: 267 additions & 267 deletions anylabeling/resources/translations/zh_CN.ts

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion anylabeling/services/auto_labeling/__base__/ram.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def predict_shapes(self, image, image_path=None):
outs = self.inference(blob)
tags = self.postprocess(outs)
description = self.get_results(tags)
result = AutoLabelingResult(shapes=[], replace=False, description=description)
result = AutoLabelingResult(
shapes=[], replace=False, description=description
)
return result

@staticmethod
Expand Down
4 changes: 3 additions & 1 deletion anylabeling/services/auto_labeling/internimage_cls.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def predict_shapes(self, image, image_path=None):
blob = self.preprocess(image)
predictions = self.net.get_ort_inference(blob, extract=False)
label = self.postprocess(predictions)
result = AutoLabelingResult(shapes=[], replace=False, description=label)
result = AutoLabelingResult(
shapes=[], replace=False, description=label
)
return result

def unload(self):
Expand Down
1 change: 1 addition & 0 deletions anylabeling/services/auto_labeling/lru_cache.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Thread-safe LRU cache implementation."""

from collections import OrderedDict
import threading

Expand Down
25 changes: 17 additions & 8 deletions anylabeling/services/auto_labeling/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ def _load_model(self, model_id):
elif model_config["type"] == "segment_anything_2_video":
try:
from .segment_anything_2_video import SegmentAnything2Video

model_config["model"] = SegmentAnything2Video(
model_config, on_message=self.new_model_status.emit
)
Expand Down Expand Up @@ -1633,22 +1634,22 @@ def set_auto_labeling_preserve_existing_annotations_state(self, state):
].set_auto_labeling_preserve_existing_annotations_state(state)

def set_auto_labeling_prompt(self):
model_list = ['segment_anything_2_video']
model_list = ["segment_anything_2_video"]
if (
self.loaded_model_config is not None
and self.loaded_model_config["type"] in model_list
):
self.loaded_model_config[
"model"
].set_auto_labeling_prompt()
self.loaded_model_config["model"].set_auto_labeling_prompt()

def unload_model(self):
"""Unload model"""
if self.loaded_model_config is not None:
self.loaded_model_config["model"].unload()
self.loaded_model_config = None

def predict_shapes(self, image, filename=None, text_prompt=None, run_tracker=False):
def predict_shapes(
self, image, filename=None, text_prompt=None, run_tracker=False
):
"""Predict shapes.
NOTE: This function is blocking. The model can take a long time to
predict. So it is recommended to use predict_shapes_threading instead.
Expand Down Expand Up @@ -1686,7 +1687,9 @@ def predict_shapes(self, image, filename=None, text_prompt=None, run_tracker=Fal
self.prediction_finished.emit()

@pyqtSlot()
def predict_shapes_threading(self, image, filename=None, text_prompt=None, run_tracker=False):
def predict_shapes_threading(
self, image, filename=None, text_prompt=None, run_tracker=False
):
"""Predict shapes.
This function starts a thread to run the prediction.
"""
Expand Down Expand Up @@ -1717,11 +1720,17 @@ def predict_shapes_threading(self, image, filename=None, text_prompt=None, run_t
self.model_execution_thread = QThread()
if text_prompt is not None:
self.model_execution_worker = GenericWorker(
self.predict_shapes, image, filename, text_prompt=text_prompt
self.predict_shapes,
image,
filename,
text_prompt=text_prompt,
)
elif run_tracker is True:
self.model_execution_worker = GenericWorker(
self.predict_shapes, image, filename, run_tracker=run_tracker
self.predict_shapes,
image,
filename,
run_tracker=run_tracker,
)
else:
self.model_execution_worker = GenericWorker(
Expand Down
97 changes: 59 additions & 38 deletions anylabeling/services/auto_labeling/segment_anything_2_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import traceback

import warnings
warnings.filterwarnings('ignore')

warnings.filterwarnings("ignore")

import cv2
import numpy as np
Expand Down Expand Up @@ -32,6 +33,7 @@ class SegmentAnything2Video(Model):

class Meta:
"""Meta class to define required configurations and UI elements."""

required_config_names = [
"type",
"name",
Expand Down Expand Up @@ -69,7 +71,7 @@ def __init__(self, config_path, on_message) -> None:
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()

if torch.cuda.get_device_properties(0).major >= 8:
# turn on tfloat32 for Ampere GPUs
# turn on tfloat32 for Ampere GPUs
# (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
Expand All @@ -78,19 +80,19 @@ def __init__(self, config_path, on_message) -> None:
self.model_abs_path = self.get_model_abs_path(
self.config, "model_path"
)
if not self.model_abs_path or not os.path.isfile(
self.model_abs_path
):
if not self.model_abs_path or not os.path.isfile(self.model_abs_path):
raise FileNotFoundError(
QCoreApplication.translate(
"Model",
"Could not download or initialize model of Segment Anything 2.",
)
)
self.model_cfg = self.config['model_cfg']
self.model_cfg = self.config["model_cfg"]
sam2_image_model = build_sam2(self.model_cfg, self.model_abs_path)
self.image_predictor = SAM2ImagePredictor(sam2_image_model)
self.video_predictor = build_sam2_camera_predictor(self.model_cfg, self.model_abs_path)
self.video_predictor = build_sam2_camera_predictor(
self.model_cfg, self.model_abs_path
)
self.is_first_init = True

# Initialize marking and prompting structures
Expand All @@ -111,7 +113,9 @@ def set_auto_labeling_reset_tracker(self):
if self.prompts:
try:
self.video_predictor.reset_state()
print(f'Successful: The tracker has been reset to its initial state.')
print(
f"Successful: The tracker has been reset to its initial state."
)
except Exception as e: # noqa
pass
self.prompts = []
Expand All @@ -121,33 +125,33 @@ def set_auto_labeling_prompt(self):
point_coords, point_labels, box = self.marks_to_prompts()
if box:
promot = {
'type': 'rectangle',
'data': np.array([[*box[:2]], [*box[2:]]], dtype=np.float32)
"type": "rectangle",
"data": np.array([[*box[:2]], [*box[2:]]], dtype=np.float32),
}
self.prompts.append(promot)
elif (point_coords and point_labels):
elif point_coords and point_labels:
promot = {
'type': 'point',
'data': {
'point_coords': np.array(point_coords, dtype=np.float32),
'point_labels': np.array(point_labels, dtype=np.int32),
}
"type": "point",
"data": {
"point_coords": np.array(point_coords, dtype=np.float32),
"point_labels": np.array(point_labels, dtype=np.int32),
},
}
self.prompts.append(promot)

def marks_to_prompts(self):
"""Convert marks to prompts for the model."""
point_coords, point_labels, box = None, None, None
for marks in self.marks:
if marks['type'] == 'rectangle':
box = marks['data']
elif marks['type'] == 'point':
if marks["type"] == "rectangle":
box = marks["data"]
elif marks["type"] == "point":
if point_coords is None and point_labels is None:
point_coords = [marks['data']]
point_labels = [marks['label']]
point_coords = [marks["data"]]
point_labels = [marks["label"]]
else:
point_coords.append(marks['data'])
point_labels.append(marks['label'])
point_coords.append(marks["data"])
point_labels.append(marks["label"])
return point_coords, point_labels, box

def post_process(self, masks, label=None):
Expand All @@ -162,7 +166,7 @@ def post_process(self, masks, label=None):
"""
# Convert masks to binary format
masks[masks > 0.0] = 255
masks[masks <= 0.] = 0
masks[masks <= 0.0] = 0
masks = masks.astype(np.uint8)

# Find contours of the masks
Expand Down Expand Up @@ -302,25 +306,39 @@ def video_process(self, cv_image, filename):
if not self.prompts:
return [], False

if not any(filename.endswith(ext) for ext in [".jpg", ".jpeg", ".JPG", ".JPEG"]):
if not any(
filename.endswith(ext)
for ext in [".jpg", ".jpeg", ".JPG", ".JPEG"]
):
print(f"Only JPEG format is supported, but got {filename}")
return [], False

if self.is_first_init:
self.video_predictor.load_first_frame(cv_image)
ann_frame_idx = 0
for i, prompt in enumerate(self.prompts):
ann_obj_id = i + 1 # give a unique id to each object we interact with (it can be any integers)
if prompt['type'] == 'rectangle':
bbox = prompt['data']
_, out_obj_ids, out_mask_logits = self.video_predictor.add_new_prompt(
frame_idx=ann_frame_idx, obj_id=ann_obj_id, bbox=bbox
ann_obj_id = (
i + 1
) # give a unique id to each object we interact with (it can be any integers)
if prompt["type"] == "rectangle":
bbox = prompt["data"]
_, out_obj_ids, out_mask_logits = (
self.video_predictor.add_new_prompt(
frame_idx=ann_frame_idx,
obj_id=ann_obj_id,
bbox=bbox,
)
)
elif prompt['type'] == 'point':
points = prompt['data']['point_coords']
labels = prompt['data']['point_labels']
_, out_obj_ids, out_mask_logits = self.video_predictor.add_new_prompt(
frame_idx=ann_frame_idx, obj_id=ann_obj_id, points=points, labels=labels
elif prompt["type"] == "point":
points = prompt["data"]["point_coords"]
labels = prompt["data"]["point_labels"]
_, out_obj_ids, out_mask_logits = (
self.video_predictor.add_new_prompt(
frame_idx=ann_frame_idx,
obj_id=ann_obj_id,
points=points,
labels=labels,
)
)
self.is_first_init = False
return [], False
Expand All @@ -333,10 +351,12 @@ def video_process(self, cv_image, filename):
masks = masks[0][0]
else:
masks = masks[0]
shapes.extend(self.post_process(masks, label=f'object{i}'))
shapes.extend(self.post_process(masks, label=f"object{i}"))
return shapes, True

def predict_shapes(self, image, filename=None, run_tracker=False) -> AutoLabelingResult:
def predict_shapes(
self, image, filename=None, run_tracker=False
) -> AutoLabelingResult:
"""Predict shapes from an image or video frame.
Args:
Expand Down Expand Up @@ -378,7 +398,8 @@ def get_ann_frame_idx(filename):
int: The index of the frame in the sorted list of frames, or -1 if not found.
"""
frame_names = [
p for p in os.listdir(os.path.dirname(filename))
p
for p in os.listdir(os.path.dirname(filename))
if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
if not frame_names:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def _create_object_prediction_list_from_original_predictions(
object_prediction_list = [
ObjectPrediction(
bbox=box.tolist() if mask is None else None,
bool_mask=mask.detach().cpu().numpy()
if mask is not None
else None,
bool_mask=(
mask.detach().cpu().numpy()
if mask is not None
else None
),
category_id=category_id.item(),
category_name=self.category_mapping[
str(category_id.item())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,11 +409,11 @@ def __call__(
self.match_metric,
self.match_threshold,
):
object_prediction_list[
keep_ind
] = merge_object_prediction_pair(
object_prediction_list[keep_ind].tolist(),
object_prediction_list[merge_ind].tolist(),
object_prediction_list[keep_ind] = (
merge_object_prediction_pair(
object_prediction_list[keep_ind].tolist(),
object_prediction_list[merge_ind].tolist(),
)
)
selected_object_predictions.append(
object_prediction_list[keep_ind].tolist()
Expand Down Expand Up @@ -451,11 +451,11 @@ def __call__(
self.match_metric,
self.match_threshold,
):
object_prediction_list[
keep_ind
] = merge_object_prediction_pair(
object_prediction_list[keep_ind].tolist(),
object_prediction_list[merge_ind].tolist(),
object_prediction_list[keep_ind] = (
merge_object_prediction_pair(
object_prediction_list[keep_ind].tolist(),
object_prediction_list[merge_ind].tolist(),
)
)
selected_object_predictions.append(
object_prediction_list[keep_ind].tolist()
Expand Down
6 changes: 3 additions & 3 deletions anylabeling/services/auto_labeling/utils/sahi/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ def get_prediction(
shift_amount=shift_amount,
full_shape=full_shape,
)
object_prediction_list: List[
ObjectPrediction
] = detection_model.object_prediction_list
object_prediction_list: List[ObjectPrediction] = (
detection_model.object_prediction_list
)

# postprocess matching predictions
if postprocess is not None:
Expand Down
Loading

0 comments on commit d9302d6

Please sign in to comment.