From 2ca48bffa8f647c40f931728d8827c03ae7aefea Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 18 Apr 2024 15:21:15 +0300 Subject: [PATCH 01/22] wip --- .../sliding_window_inference_wrapper.py | 140 ++++++++++++++++++ .../training/processing/processing.py | 4 +- .../yolo_nas_integration_test.py | 21 +++ 3 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py diff --git a/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py new file mode 100644 index 0000000000..30f55e7270 --- /dev/null +++ b/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torchvision +from typing import List + +from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST +from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback +from super_gradients.training.processing.processing import default_yolo_nas_coco_processing_params + + +class SlidingWindowInferenceWrapper(nn.Module): + def __init__(self, model, tile_size, tile_step, post_prediction_callback, nms_threshold=0.65, max_predictions_per_image=300, nms_top_k=1000): + super().__init__() + self.model = model + self.tile_size = tile_size + self.tile_step = tile_step + self.post_prediction_callback = post_prediction_callback + self.nms_threshold = nms_threshold + self.max_predictions_per_image = max_predictions_per_image + self.nms_top_k = nms_top_k + + def _filter_max_predictions(self, res: List) -> List: + res[:] = [im[: self.max_predictions_per_image] if (im is not None and im.shape[0] > self.max_predictions_per_image) else im for im in res] + + return res + + def forward(self, images): + batch_size, _, _, _ = images.shape + all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch + + # Generate and process each tile + for img_idx in range(batch_size): + single_image = images[img_idx : img_idx + 1] # Extract each image + tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) + for tile, (start_x, start_y) in tiles: + tile_detections = self.model(tile) + # Apply local NMS using post_prediction_callback + tile_detections = self.post_prediction_callback(tile_detections) + # Adjust detections to global image coordinates + for img_i_tile_detections in tile_detections: + if len(img_i_tile_detections) > 0: + img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) + all_detections[img_idx].append(img_i_tile_detections) + + # Concatenate and apply global NMS for each image's detections + final_detections = [] + for detections in all_detections: + if detections: + detections = torch.cat(detections, dim=0) + # Apply global NMS + pred_bboxes = detections[:, :4] + pred_cls_conf = detections[:, 4] + pred_cls_label = detections[:, 5] + + if pred_cls_conf.size(0) > self.nms_top_k: + topk_candidates = torch.topk(pred_cls_conf, k=self.nms_top_k, largest=True) + pred_cls_conf = pred_cls_conf[topk_candidates.indices] + pred_cls_label = pred_cls_label[topk_candidates.indices] + pred_bboxes = pred_bboxes[topk_candidates.indices, :] + + idx_to_keep = torchvision.ops.boxes.batched_nms(boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.nms_threshold) + + final_detections.append(detections[idx_to_keep]) + else: + final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections + + if self.max_predictions_per_image is not None: + final_detections = self._filter_max_predictions(final_detections) + return final_detections + + def _generate_tiles(self, image, tile_size, tile_step): + _, _, h, w = image.shape + tiles = [] + for y in range(0, h - tile_size + 1, tile_step): + for x in range(0, w - tile_size + 1, tile_step): + tile = image[:, :, y : y + tile_size, x : x + tile_size] + tiles.append((tile, (x, y))) + return tiles + + +if __name__ == "__main__": + from super_gradients.training.models import get + from super_gradients.common.object_names import Models + import os + from super_gradients.training.dataloaders import coco2017_val_yolo_nas + from super_gradients.training.utils.detection_utils import DetectionVisualization + import cv2 + + data_dir = os.environ.get("SUPER_GRADIENTS_COCO_DATASET_DIR", "/data/coco") + + dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=data_dir), dataloader_params=dict(batch_size=4)) + x, y, _ = next(iter(dl)) + # x_repeat = torch.zeros((4,3,1280,1280)) + # x_repeat[:, :, 0:640, 0:640] = x + # x_repeat[:, :, 640:1280, 0:640] = x + # x_repeat[:, :, 640: 1280, 640:1280] = x + # x_repeat[:, :, 0: 640, 640:1280] = x\ + input_dim = [1280, 1280] + img = cv2.imread("/home/shay.aharon/cars-for-sale-parking-sale-4f07c1178051f8b82c8bbc640fb3c27d.jpg") + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + # r = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1]) + # desired_size = (int(img.shape[1] * r), int(img.shape[0] * r)) + # img = cv2.resize(src=img, dsize=desired_size, interpolation=cv2.INTER_LINEAR).astype(np.uint8) + + pp = default_yolo_nas_coco_processing_params() + processor = pp["image_processor"] + + # Switch from HWC to CHW + # img_chw = np.transpose(img_rgb, (2, 0, 1)) + + # Convert to tensor + # img_tensor = torch.from_numpy(img_chw).float() / 255. + + # Unsqueeze to add the batch dimension + img_tensor, _ = processor.preprocess_image(img) + img_tensor = torch.from_numpy(img_tensor) + img_tensor = img_tensor.unsqueeze(0) + model = get(Models.YOLO_NAS_S, pretrained_weights="coco") + ppcb = PPYoloEPostPredictionCallback(score_threshold=0.25, nms_top_k=1000, max_predictions=300, nms_threshold=0.7) + sm = SlidingWindowInferenceWrapper(model, 640, 64, post_prediction_callback=ppcb) + out_sliding_window = sm(img_tensor) + DetectionVisualization.visualize_batch( + image_tensor=img_tensor, + pred_boxes=out_sliding_window, + target_boxes=y, + batch_name="640_tile_64_step_on_cars_large_1280_bgr_no_cv2resize_fix", + class_names=COCO_DETECTION_CLASSES_LIST, + checkpoint_dir="/home/shay.aharon/sw_outputs", + ) + + # Example of how to set up and use the SlidingWindowInferenceWrapper: + # from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback + # + # nms_callback = PPYoloEPostPredictionCallback(score_threshold=0.3, nms_threshold=0.5, nms_top_k=200, max_predictions=100) + # sw_inference = SlidingWindowInferenceWrapper(ppyoloe_model, tile_size=512, tile_step=256, + # post_prediction_callback=nms_callback) + # + # # Forward an image through the sliding window inference wrapper + # image = torch.rand(1, 3, 1024, 1024) # Example image tensor + # result = sw_inference(image) diff --git a/src/super_gradients/training/processing/processing.py b/src/super_gradients/training/processing/processing.py index 0fb13f481f..7c47a42468 100644 --- a/src/super_gradients/training/processing/processing.py +++ b/src/super_gradients/training/processing/processing.py @@ -964,8 +964,8 @@ def default_yolo_nas_coco_processing_params() -> dict: image_processor = ComposeProcessing( [ - DetectionLongestMaxSizeRescale(output_shape=(636, 636)), - DetectionCenterPadding(output_shape=(640, 640), pad_value=114), + DetectionLongestMaxSizeRescale(output_shape=(1280, 1280)), + DetectionCenterPadding(output_shape=(1280, 1280), pad_value=114), StandardizeImage(max_value=255.0), ImagePermute(permutation=(2, 0, 1)), ] diff --git a/tests/integration_tests/yolo_nas_integration_test.py b/tests/integration_tests/yolo_nas_integration_test.py index df8b9f3e9f..bbcf039816 100644 --- a/tests/integration_tests/yolo_nas_integration_test.py +++ b/tests/integration_tests/yolo_nas_integration_test.py @@ -5,6 +5,13 @@ from super_gradients.training import Trainer from super_gradients.training.metrics import DetectionMetrics from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback +from super_gradients.training.models.detection_models.sliding_window_inference_wrapper import SlidingWindowInferenceWrapper +from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback + + +class IdentityPPCB(DetectionPostPredictionCallback): + def forward(self, x, device: str = None): + return x class YoloNASIntegrationTest(unittest.TestCase): @@ -23,6 +30,20 @@ def test_yolo_nas_s_coco(self): metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric]) self.assertAlmostEqual(metric_values[metric.map_str], 0.475, delta=0.001) + def test_yolo_nas_s_coco_sw(self): + trainer = Trainer("test_yolo_nas_s") + model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") + dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=self.data_dir)) + metric = DetectionMetrics( + normalize_targets=True, + post_prediction_callback=IdentityPPCB(), + num_cls=80, + ) + ppcb = PPYoloEPostPredictionCallback(score_threshold=0.03, nms_top_k=1000, max_predictions=300, nms_threshold=0.65) + sm = SlidingWindowInferenceWrapper(model, 320, 320, post_prediction_callback=ppcb) + metric_values = trainer.test(model=sm, test_loader=dl, test_metrics_list=[metric]) + self.assertAlmostEqual(metric_values[metric.map_str], 0.475, delta=0.001) + def test_yolo_nas_m_coco(self): trainer = Trainer("test_yolo_nas_m") model = models.get("yolo_nas_m", num_classes=80, pretrained_weights="coco") From c3666e9b69deacf7e35d49b5e8bd8cbdd65afccb Mon Sep 17 00:00:00 2001 From: shayaharon Date: Tue, 30 Apr 2024 11:45:18 +0300 Subject: [PATCH 02/22] wip --- .../training/models/detection_models/customizable_detector.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index 1dc372e2aa..f127e8b341 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -45,6 +45,7 @@ def __init__( bn_momentum: Optional[float] = None, inplace_act: Optional[bool] = True, in_channels: int = 3, + use_sliding_window_validation: bool = True, ): """ :param backbone: Backbone configuration. @@ -63,6 +64,7 @@ def __init__( self.bn_momentum = bn_momentum self.inplace_act = inplace_act self.in_channels = in_channels + self.use_sliding_window_validation = use_sliding_window_validation factory = det_factory.DetectionModulesFactory() # move num_classes into heads params From 4b783eae52751c52065b0354e6b4993acdcb243c Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 1 May 2024 14:18:31 +0300 Subject: [PATCH 03/22] wip2 --- .../detection_models/customizable_detector.py | 54 +++++++++++++++++++ .../sliding_window_inference_wrapper.py | 2 +- .../training/processing/processing.py | 2 +- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index f127e8b341..bf446adaa0 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -25,6 +25,7 @@ from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback from super_gradients.training.utils.media.image import ImageSource +import torchvision class CustomizableDetector(HasPredict, SgModule): @@ -91,7 +92,60 @@ def __init__( self._default_multi_label_per_box = True self._default_class_agnostic_nms = False + def forward_sliding_window(self, images): + batch_size, _, _, _ = images.shape + all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch + + # Generate and process each tile + for img_idx in range(batch_size): + single_image = images[img_idx : img_idx + 1] # Extract each image + tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) + for tile, (start_x, start_y) in tiles: + tile_detections = self.model(tile) + # Apply local NMS using post_prediction_callback + tile_detections = self.post_prediction_callback(tile_detections) + # Adjust detections to global image coordinates + for img_i_tile_detections in tile_detections: + if len(img_i_tile_detections) > 0: + img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) + all_detections[img_idx].append(img_i_tile_detections) + + # Concatenate and apply global NMS for each image's detections + final_detections = [] + for detections in all_detections: + if detections: + detections = torch.cat(detections, dim=0) + # Apply global NMS + pred_bboxes = detections[:, :4] + pred_cls_conf = detections[:, 4] + pred_cls_label = detections[:, 5] + idx_to_keep = torchvision.ops.boxes.batched_nms(boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.nms_threshold) + + final_detections.append(detections[idx_to_keep]) + else: + final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections + + if self.max_predictions_per_image is not None: + final_detections = self._filter_max_predictions(final_detections) + return final_detections + + @staticmethod + def _generate_tiles(image, tile_size, tile_step): + _, _, h, w = image.shape + tiles = [] + for y in range(0, h - tile_size + 1, tile_step): + for x in range(0, w - tile_size + 1, tile_step): + tile = image[:, :, y : y + tile_size, x : x + tile_size] + tiles.append((tile, (x, y))) + return tiles + def forward(self, x): + if self.use_sliding_window_validation and not self.training: + return self.forward_sliding_window(x) + else: + return self.forward_whole_image(x) + + def forward_whole_image(self, x): x = self.backbone(x) x = self.neck(x) return self.heads(x) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py index 30f55e7270..9b19ef3701 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py @@ -24,7 +24,7 @@ def _filter_max_predictions(self, res: List) -> List: return res - def forward(self, images): + def forward_sliding_window(self, images): batch_size, _, _, _ = images.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch diff --git a/src/super_gradients/training/processing/processing.py b/src/super_gradients/training/processing/processing.py index 7c47a42468..07ad832eb5 100644 --- a/src/super_gradients/training/processing/processing.py +++ b/src/super_gradients/training/processing/processing.py @@ -965,7 +965,7 @@ def default_yolo_nas_coco_processing_params() -> dict: image_processor = ComposeProcessing( [ DetectionLongestMaxSizeRescale(output_shape=(1280, 1280)), - DetectionCenterPadding(output_shape=(1280, 1280), pad_value=114), + DetectionCenterPadding(output_shape=(640, 640), pad_value=114), StandardizeImage(max_value=255.0), ImagePermute(permutation=(2, 0, 1)), ] From 1d8cc8cf7df950a87b050e29aca11348b370feba Mon Sep 17 00:00:00 2001 From: shayaharon Date: Sun, 5 May 2024 10:48:52 +0300 Subject: [PATCH 04/22] working version, hard coded nms params --- .../examples/predict/detection_predict.py | 2 +- .../detection_models/customizable_detector.py | 62 ++++++-- .../sliding_window_inference_wrapper.py | 140 ------------------ .../training/processing/processing.py | 2 +- .../yolo_nas_integration_test.py | 21 --- 5 files changed, 48 insertions(+), 179 deletions(-) delete mode 100644 src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py diff --git a/src/super_gradients/examples/predict/detection_predict.py b/src/super_gradients/examples/predict/detection_predict.py index 735871d5f4..60b4b2b633 100644 --- a/src/super_gradients/examples/predict/detection_predict.py +++ b/src/super_gradients/examples/predict/detection_predict.py @@ -3,7 +3,7 @@ from super_gradients.training import models # Note that currently only YoloX, PPYoloE and YOLO-NAS are supported. -model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco") +model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco") # We want to use cuda if available to speed up inference. model = model.to("cuda" if torch.cuda.is_available() else "cpu") diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index bf446adaa0..83e5a37dc1 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -28,6 +28,11 @@ import torchvision +class IdentityPostPredictionCallback(DetectionPostPredictionCallback): + def forward(self, x, device: str = None): + return x + + class CustomizableDetector(HasPredict, SgModule): """ A customizable detector with backbone -> neck -> heads @@ -47,6 +52,9 @@ def __init__( inplace_act: Optional[bool] = True, in_channels: int = 3, use_sliding_window_validation: bool = True, + tile_size=640, + tile_step=64, + min_tile_threshold=30, ): """ :param backbone: Backbone configuration. @@ -66,6 +74,12 @@ def __init__( self.inplace_act = inplace_act self.in_channels = in_channels self.use_sliding_window_validation = use_sliding_window_validation + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=0.7, conf=0.25, nms_top_k=1024, max_predictions=300, multi_label_per_box=True, class_agnostic_nms=False + ) + self.tile_size = tile_size + self.tile_step = tile_step + self.min_tile_threshold = min_tile_threshold factory = det_factory.DetectionModulesFactory() # move num_classes into heads params @@ -101,9 +115,9 @@ def forward_sliding_window(self, images): single_image = images[img_idx : img_idx + 1] # Extract each image tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) for tile, (start_x, start_y) in tiles: - tile_detections = self.model(tile) + tile_detections = self.forward_whole_image(tile) # Apply local NMS using post_prediction_callback - tile_detections = self.post_prediction_callback(tile_detections) + tile_detections = self.sliding_window_post_prediction_callback(tile_detections) # Adjust detections to global image coordinates for img_i_tile_detections in tile_detections: if len(img_i_tile_detections) > 0: @@ -119,24 +133,35 @@ def forward_sliding_window(self, images): pred_bboxes = detections[:, :4] pred_cls_conf = detections[:, 4] pred_cls_label = detections[:, 5] - idx_to_keep = torchvision.ops.boxes.batched_nms(boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.nms_threshold) + idx_to_keep = torchvision.ops.boxes.batched_nms( + boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self._default_nms_iou + ) final_detections.append(detections[idx_to_keep]) else: final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections - - if self.max_predictions_per_image is not None: - final_detections = self._filter_max_predictions(final_detections) return final_detections - @staticmethod - def _generate_tiles(image, tile_size, tile_step): + def _generate_tiles(self, image, tile_size, tile_step): _, _, h, w = image.shape tiles = [] - for y in range(0, h - tile_size + 1, tile_step): - for x in range(0, w - tile_size + 1, tile_step): - tile = image[:, :, y : y + tile_size, x : x + tile_size] + + # Calculate the end points for the grid + max_y = h if (h - tile_size) % tile_step < self.min_tile_threshold else h - (h - tile_size) % tile_step + tile_size + max_x = w if (w - tile_size) % tile_step < self.min_tile_threshold else w - (w - tile_size) % tile_step + tile_size + + # Ensure that the image has enough padding if needed + if max_y > h or max_x > w: + padded_image = torch.zeros((image.shape[0], image.shape[1], max(max_y, h), max(max_x, w)), device=image.device) + padded_image[:, :, :h, :w] = image # Place the original image in the padded one + else: + padded_image = image + + for y in range(0, max_y - tile_size + 1, tile_step): + for x in range(0, max_x - tile_size + 1, tile_step): + tile = padded_image[:, :, y : y + tile_size, x : x + tile_size] tiles.append((tile, (x, y))) + return tiles def forward(self, x): @@ -302,17 +327,22 @@ def _get_pipeline( else: image_processor = self._image_processor - pipeline = DetectionPipeline( - model=self, - image_processor=image_processor, - post_prediction_callback=self.get_post_prediction_callback( + if self.use_sliding_window_validation: + post_prediction_callback = IdentityPostPredictionCallback() + else: + post_prediction_callback = self.get_post_prediction_callback( iou=iou, conf=conf, nms_top_k=nms_top_k, max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, - ), + ) + + pipeline = DetectionPipeline( + model=self, + image_processor=image_processor, + post_prediction_callback=post_prediction_callback, class_names=self._class_names, fuse_model=fuse_model, fp16=fp16, diff --git a/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py deleted file mode 100644 index 9b19ef3701..0000000000 --- a/src/super_gradients/training/models/detection_models/sliding_window_inference_wrapper.py +++ /dev/null @@ -1,140 +0,0 @@ -import torch -import torch.nn as nn -import torchvision -from typing import List - -from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST -from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback -from super_gradients.training.processing.processing import default_yolo_nas_coco_processing_params - - -class SlidingWindowInferenceWrapper(nn.Module): - def __init__(self, model, tile_size, tile_step, post_prediction_callback, nms_threshold=0.65, max_predictions_per_image=300, nms_top_k=1000): - super().__init__() - self.model = model - self.tile_size = tile_size - self.tile_step = tile_step - self.post_prediction_callback = post_prediction_callback - self.nms_threshold = nms_threshold - self.max_predictions_per_image = max_predictions_per_image - self.nms_top_k = nms_top_k - - def _filter_max_predictions(self, res: List) -> List: - res[:] = [im[: self.max_predictions_per_image] if (im is not None and im.shape[0] > self.max_predictions_per_image) else im for im in res] - - return res - - def forward_sliding_window(self, images): - batch_size, _, _, _ = images.shape - all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch - - # Generate and process each tile - for img_idx in range(batch_size): - single_image = images[img_idx : img_idx + 1] # Extract each image - tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) - for tile, (start_x, start_y) in tiles: - tile_detections = self.model(tile) - # Apply local NMS using post_prediction_callback - tile_detections = self.post_prediction_callback(tile_detections) - # Adjust detections to global image coordinates - for img_i_tile_detections in tile_detections: - if len(img_i_tile_detections) > 0: - img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) - all_detections[img_idx].append(img_i_tile_detections) - - # Concatenate and apply global NMS for each image's detections - final_detections = [] - for detections in all_detections: - if detections: - detections = torch.cat(detections, dim=0) - # Apply global NMS - pred_bboxes = detections[:, :4] - pred_cls_conf = detections[:, 4] - pred_cls_label = detections[:, 5] - - if pred_cls_conf.size(0) > self.nms_top_k: - topk_candidates = torch.topk(pred_cls_conf, k=self.nms_top_k, largest=True) - pred_cls_conf = pred_cls_conf[topk_candidates.indices] - pred_cls_label = pred_cls_label[topk_candidates.indices] - pred_bboxes = pred_bboxes[topk_candidates.indices, :] - - idx_to_keep = torchvision.ops.boxes.batched_nms(boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.nms_threshold) - - final_detections.append(detections[idx_to_keep]) - else: - final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections - - if self.max_predictions_per_image is not None: - final_detections = self._filter_max_predictions(final_detections) - return final_detections - - def _generate_tiles(self, image, tile_size, tile_step): - _, _, h, w = image.shape - tiles = [] - for y in range(0, h - tile_size + 1, tile_step): - for x in range(0, w - tile_size + 1, tile_step): - tile = image[:, :, y : y + tile_size, x : x + tile_size] - tiles.append((tile, (x, y))) - return tiles - - -if __name__ == "__main__": - from super_gradients.training.models import get - from super_gradients.common.object_names import Models - import os - from super_gradients.training.dataloaders import coco2017_val_yolo_nas - from super_gradients.training.utils.detection_utils import DetectionVisualization - import cv2 - - data_dir = os.environ.get("SUPER_GRADIENTS_COCO_DATASET_DIR", "/data/coco") - - dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=data_dir), dataloader_params=dict(batch_size=4)) - x, y, _ = next(iter(dl)) - # x_repeat = torch.zeros((4,3,1280,1280)) - # x_repeat[:, :, 0:640, 0:640] = x - # x_repeat[:, :, 640:1280, 0:640] = x - # x_repeat[:, :, 640: 1280, 640:1280] = x - # x_repeat[:, :, 0: 640, 640:1280] = x\ - input_dim = [1280, 1280] - img = cv2.imread("/home/shay.aharon/cars-for-sale-parking-sale-4f07c1178051f8b82c8bbc640fb3c27d.jpg") - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - # r = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1]) - # desired_size = (int(img.shape[1] * r), int(img.shape[0] * r)) - # img = cv2.resize(src=img, dsize=desired_size, interpolation=cv2.INTER_LINEAR).astype(np.uint8) - - pp = default_yolo_nas_coco_processing_params() - processor = pp["image_processor"] - - # Switch from HWC to CHW - # img_chw = np.transpose(img_rgb, (2, 0, 1)) - - # Convert to tensor - # img_tensor = torch.from_numpy(img_chw).float() / 255. - - # Unsqueeze to add the batch dimension - img_tensor, _ = processor.preprocess_image(img) - img_tensor = torch.from_numpy(img_tensor) - img_tensor = img_tensor.unsqueeze(0) - model = get(Models.YOLO_NAS_S, pretrained_weights="coco") - ppcb = PPYoloEPostPredictionCallback(score_threshold=0.25, nms_top_k=1000, max_predictions=300, nms_threshold=0.7) - sm = SlidingWindowInferenceWrapper(model, 640, 64, post_prediction_callback=ppcb) - out_sliding_window = sm(img_tensor) - DetectionVisualization.visualize_batch( - image_tensor=img_tensor, - pred_boxes=out_sliding_window, - target_boxes=y, - batch_name="640_tile_64_step_on_cars_large_1280_bgr_no_cv2resize_fix", - class_names=COCO_DETECTION_CLASSES_LIST, - checkpoint_dir="/home/shay.aharon/sw_outputs", - ) - - # Example of how to set up and use the SlidingWindowInferenceWrapper: - # from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback - # - # nms_callback = PPYoloEPostPredictionCallback(score_threshold=0.3, nms_threshold=0.5, nms_top_k=200, max_predictions=100) - # sw_inference = SlidingWindowInferenceWrapper(ppyoloe_model, tile_size=512, tile_step=256, - # post_prediction_callback=nms_callback) - # - # # Forward an image through the sliding window inference wrapper - # image = torch.rand(1, 3, 1024, 1024) # Example image tensor - # result = sw_inference(image) diff --git a/src/super_gradients/training/processing/processing.py b/src/super_gradients/training/processing/processing.py index 07ad832eb5..7c47a42468 100644 --- a/src/super_gradients/training/processing/processing.py +++ b/src/super_gradients/training/processing/processing.py @@ -965,7 +965,7 @@ def default_yolo_nas_coco_processing_params() -> dict: image_processor = ComposeProcessing( [ DetectionLongestMaxSizeRescale(output_shape=(1280, 1280)), - DetectionCenterPadding(output_shape=(640, 640), pad_value=114), + DetectionCenterPadding(output_shape=(1280, 1280), pad_value=114), StandardizeImage(max_value=255.0), ImagePermute(permutation=(2, 0, 1)), ] diff --git a/tests/integration_tests/yolo_nas_integration_test.py b/tests/integration_tests/yolo_nas_integration_test.py index bbcf039816..df8b9f3e9f 100644 --- a/tests/integration_tests/yolo_nas_integration_test.py +++ b/tests/integration_tests/yolo_nas_integration_test.py @@ -5,13 +5,6 @@ from super_gradients.training import Trainer from super_gradients.training.metrics import DetectionMetrics from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback -from super_gradients.training.models.detection_models.sliding_window_inference_wrapper import SlidingWindowInferenceWrapper -from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback - - -class IdentityPPCB(DetectionPostPredictionCallback): - def forward(self, x, device: str = None): - return x class YoloNASIntegrationTest(unittest.TestCase): @@ -30,20 +23,6 @@ def test_yolo_nas_s_coco(self): metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric]) self.assertAlmostEqual(metric_values[metric.map_str], 0.475, delta=0.001) - def test_yolo_nas_s_coco_sw(self): - trainer = Trainer("test_yolo_nas_s") - model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") - dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=self.data_dir)) - metric = DetectionMetrics( - normalize_targets=True, - post_prediction_callback=IdentityPPCB(), - num_cls=80, - ) - ppcb = PPYoloEPostPredictionCallback(score_threshold=0.03, nms_top_k=1000, max_predictions=300, nms_threshold=0.65) - sm = SlidingWindowInferenceWrapper(model, 320, 320, post_prediction_callback=ppcb) - metric_values = trainer.test(model=sm, test_loader=dl, test_metrics_list=[metric]) - self.assertAlmostEqual(metric_values[metric.map_str], 0.475, delta=0.001) - def test_yolo_nas_m_coco(self): trainer = Trainer("test_yolo_nas_m") model = models.get("yolo_nas_m", num_classes=80, pretrained_weights="coco") From fae6d8dae30b4dfd8b033d1ede56fc3ff28fe940 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Sun, 5 May 2024 11:06:38 +0300 Subject: [PATCH 05/22] moved post prediction callback to utils --- .../detection_models/customizable_detector.py | 13 +++++++------ .../training/utils/detection_utils.py | 9 +++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index 83e5a37dc1..b80f0bd415 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -23,16 +23,11 @@ from super_gradients.training.utils.predict import ImagesDetectionPrediction from super_gradients.training.pipelines.pipelines import DetectionPipeline from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding -from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback +from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback, IdentityPostPredictionCallback from super_gradients.training.utils.media.image import ImageSource import torchvision -class IdentityPostPredictionCallback(DetectionPostPredictionCallback): - def forward(self, x, device: str = None): - return x - - class CustomizableDetector(HasPredict, SgModule): """ A customizable detector with backbone -> neck -> heads @@ -55,6 +50,12 @@ def __init__( tile_size=640, tile_step=64, min_tile_threshold=30, + tile_nms_iou: float = 0.7, + tile_nms_conf: float = 0.5, + tile_nms_top_k: int = 1024, + tile_nms_max_predictions=300, + tile_nms_multi_label_per_box=True, + tile_nms_class_agnostic_nms=False, ): """ :param backbone: Backbone configuration. diff --git a/src/super_gradients/training/utils/detection_utils.py b/src/super_gradients/training/utils/detection_utils.py index 0d528d837f..844acd2829 100755 --- a/src/super_gradients/training/utils/detection_utils.py +++ b/src/super_gradients/training/utils/detection_utils.py @@ -228,6 +228,15 @@ def forward(self, x, device: str = None): raise NotImplementedError +class IdentityPostPredictionCallback(DetectionPostPredictionCallback): + """ + Detection Post Prediction callback that simply returns the input + """ + + def forward(self, x, device: str = None): + return x + + class IouThreshold(tuple, Enum): MAP_05 = (0.5, 0.5) MAP_05_TO_095 = (0.5, 0.95) From 45aea2aa52552c1c69a695a6a875c68cc11bd987 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Tue, 7 May 2024 09:55:01 +0300 Subject: [PATCH 06/22] moved back to wrapper --- .../sliding_window_detector.py | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 src/super_gradients/training/models/detection_models/sliding_window_detector.py diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detector.py b/src/super_gradients/training/models/detection_models/sliding_window_detector.py new file mode 100644 index 0000000000..bfcf817f5e --- /dev/null +++ b/src/super_gradients/training/models/detection_models/sliding_window_detector.py @@ -0,0 +1,344 @@ +from typing import Optional, List +from functools import lru_cache + +import torch + +from super_gradients.common.decorators.factory_decorator import resolve_param +from super_gradients.common.factories.processing_factory import ProcessingFactory +from super_gradients.module_interfaces import HasPredict +from super_gradients.training.models.sg_module import SgModule +from super_gradients.training.utils.predict import ImagesDetectionPrediction +from super_gradients.training.pipelines.pipelines import DetectionPipeline +from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding +from super_gradients.training.utils.detection_utils import IdentityPostPredictionCallback +from super_gradients.training.utils.media.image import ImageSource +import torchvision +from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback + + +class SlidingWindowInferenceDetectionWrapper(HasPredict, SgModule): + """ + A customizable detector with backbone -> neck -> heads + Each submodule with its parameters must be defined explicitly. + Modules should follow the interface of BaseDetectionModule + """ + + def __init__( + self, + model: SgModule, + tile_size=640, + tile_step=64, + min_tile_threshold=30, + tile_nms_iou: float = 0.7, + tile_nms_conf: float = 0.5, + tile_nms_top_k: int = 1024, + tile_nms_max_predictions=300, + tile_nms_multi_label_per_box=True, + tile_nms_class_agnostic_nms=False, + ): + """ + + :param tile_size: + :param tile_step: + :param min_tile_threshold: + :param tile_nms_iou: + :param tile_nms_conf: + :param tile_nms_top_k: + :param tile_nms_max_predictions: + :param tile_nms_multi_label_per_box: + :param tile_nms_class_agnostic_nms: + """ + super().__init__() + self.model = model + + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=tile_nms_iou, + conf=tile_nms_conf, + nms_top_k=tile_nms_top_k, + max_predictions=tile_nms_max_predictions, + multi_label_per_box=tile_nms_multi_label_per_box, + class_agnostic_nms=tile_nms_class_agnostic_nms, + ) + self.tile_size = tile_size + self.tile_step = tile_step + self.min_tile_threshold = min_tile_threshold + self._class_names: Optional[List[str]] = None + self._image_processor: Optional[Processing] = None + + def forward(self, images): + batch_size, _, _, _ = images.shape + all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch + + # Generate and process each tile + for img_idx in range(batch_size): + single_image = images[img_idx : img_idx + 1] # Extract each image + tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) + for tile, (start_x, start_y) in tiles: + tile_detections = self.forward_whole_image(tile) + # Apply local NMS using post_prediction_callback + tile_detections = self.sliding_window_post_prediction_callback(tile_detections) + # Adjust detections to global image coordinates + for img_i_tile_detections in tile_detections: + if len(img_i_tile_detections) > 0: + img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) + all_detections[img_idx].append(img_i_tile_detections) + + # Concatenate and apply global NMS for each image's detections + final_detections = [] + for detections in all_detections: + if detections: + detections = torch.cat(detections, dim=0) + # Apply global NMS + pred_bboxes = detections[:, :4] + pred_cls_conf = detections[:, 4] + pred_cls_label = detections[:, 5] + idx_to_keep = torchvision.ops.boxes.batched_nms( + boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.sliding_window_post_prediction_callback.iou + ) + + final_detections.append(detections[idx_to_keep]) + else: + final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections + return final_detections + + def _generate_tiles(self, image, tile_size, tile_step): + _, _, h, w = image.shape + tiles = [] + + # Calculate the end points for the grid + max_y = h if (h - tile_size) % tile_step < self.min_tile_threshold else h - (h - tile_size) % tile_step + tile_size + max_x = w if (w - tile_size) % tile_step < self.min_tile_threshold else w - (w - tile_size) % tile_step + tile_size + + # Ensure that the image has enough padding if needed + if max_y > h or max_x > w: + padded_image = torch.zeros((image.shape[0], image.shape[1], max(max_y, h), max(max_x, w)), device=image.device) + padded_image[:, :, :h, :w] = image # Place the original image in the padded one + else: + padded_image = image + + for y in range(0, max_y - tile_size + 1, tile_step): + for x in range(0, max_x - tile_size + 1, tile_step): + tile = padded_image[:, :, y : y + tile_size, x : x + tile_size] + tiles.append((tile, (x, y))) + + return tiles + + def get_post_prediction_callback( + self, *, conf: float, iou: float, nms_top_k: int, max_predictions: int, multi_label_per_box: bool, class_agnostic_nms: bool + ) -> DetectionPostPredictionCallback: + """ + Get a post prediction callback for this model. + + :param conf: A minimum confidence threshold for predictions to be used in post-processing. + :param iou: A IoU threshold for boxes non-maximum suppression. + :param nms_top_k: The maximum number of detections to consider for NMS. + :param max_predictions: The maximum number of detections to return. + :param multi_label_per_box: If True, each anchor can produce multiple labels of different classes. + If False, each anchor can produce only one label of the class with the highest score. + :param class_agnostic_nms: If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). + If False NMS is performed separately for each class. + :return: + """ + return self.model.get_post_prediction_callback( + conf=conf, + iou=iou, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + ) + + @resolve_param("image_processor", ProcessingFactory()) + def set_dataset_processing_params( + self, + class_names: Optional[List[str]] = None, + image_processor: Optional[Processing] = None, + iou: Optional[float] = None, + conf: Optional[float] = None, + nms_top_k: Optional[int] = None, + max_predictions: Optional[int] = None, + multi_label_per_box: Optional[bool] = None, + class_agnostic_nms: Optional[bool] = None, + ) -> None: + """Set the processing parameters for the dataset. + + :param class_names: (Optional) Names of the dataset the model was trained on. + :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training. + :param iou: (Optional) IoU threshold for the nms algorithm + :param conf: (Optional) Below the confidence threshold, prediction are discarded + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. + :param max_predictions: (Optional) The maximum number of detections to return. + :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. + If False, each anchor can produce only one label of the class with the highest score. + :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). + If False NMS is performed separately for each class. + """ + if class_names is not None: + self._class_names = tuple(class_names) + if image_processor is not None: + self._image_processor = image_processor + if iou is not None: + self.sliding_window_post_prediction_callback.iou = float(iou) + if conf is not None: + self.sliding_window_post_prediction_callback.conf = float(conf) + if nms_top_k is not None: + self.sliding_window_post_prediction_callback.nms_top_k = int(nms_top_k) + if max_predictions is not None: + self.sliding_window_post_prediction_callback.max_predictions = int(max_predictions) + if multi_label_per_box is not None: + self.sliding_window_post_prediction_callback.multi_label_per_box = bool(multi_label_per_box) + if class_agnostic_nms is not None: + self.sliding_window_post_prediction_callback.class_agnostic_nms = bool(class_agnostic_nms) + + def get_processing_params(self) -> Optional[Processing]: + return self._image_processor + + @lru_cache(maxsize=1) + def _get_pipeline( + self, + *, + iou: Optional[float] = None, + conf: Optional[float] = None, + fuse_model: bool = True, + skip_image_resizing: bool = False, + nms_top_k: Optional[int] = None, + max_predictions: Optional[int] = None, + multi_label_per_box: Optional[bool] = None, + class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, + ) -> DetectionPipeline: + """Instantiate the prediction pipeline of this model. + + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param skip_image_resizing: If True, the image processor will not resize the images. + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. + :param max_predictions: (Optional) The maximum number of detections to return. + :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. + If False, each anchor can produce only one label of the class with the highest score. + :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). + If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. + """ + if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf): + raise RuntimeError( + "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first." + ) + + self.sliding_window_post_prediction_callback.iou = self.sliding_window_post_prediction_callback.iou if iou is None else iou + self.sliding_window_post_prediction_callback.nms_conf = self.sliding_window_post_prediction_callback.nms_conf if conf is None else conf + self.sliding_window_post_prediction_callback.nms_top_k = self.sliding_window_post_prediction_callback.nms_top_k if nms_top_k is None else nms_top_k + self.sliding_window_post_prediction_callback.max_predictions = ( + self.sliding_window_post_prediction_callback.max_predictions if max_predictions is None else max_predictions + ) + self.sliding_window_post_prediction_callback.multi_label_per_box = ( + self.sliding_window_post_prediction_callback.multi_label_per_box if multi_label_per_box is None else multi_label_per_box + ) + self.sliding_window_post_prediction_callback.class_agnostic_nms = ( + self.sliding_window_post_prediction_callback.class_agnostic_nms if class_agnostic_nms is None else class_agnostic_nms + ) + + # Ensure that the image size is divisible by 32. + if isinstance(self._image_processor, ComposeProcessing) and skip_image_resizing: + image_processor = self._image_processor.get_equivalent_compose_without_resizing( + auto_padding=DetectionAutoPadding(shape_multiple=(32, 32), pad_value=0) + ) + else: + image_processor = self._image_processor + + pipeline = DetectionPipeline( + model=self, + image_processor=image_processor, + post_prediction_callback=IdentityPostPredictionCallback(), + class_names=self._class_names, + fuse_model=fuse_model, + fp16=fp16, + ) + return pipeline + + def predict( + self, + images: ImageSource, + iou: Optional[float] = None, + conf: Optional[float] = None, + batch_size: int = 32, + fuse_model: bool = True, + skip_image_resizing: bool = False, + nms_top_k: Optional[int] = None, + max_predictions: Optional[int] = None, + multi_label_per_box: Optional[bool] = None, + class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, + ) -> ImagesDetectionPrediction: + """Predict an image or a list of images. + + :param images: Images to predict. + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param batch_size: Maximum number of images to process at the same time. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param skip_image_resizing: If True, the image processor will not resize the images. + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. + :param max_predictions: (Optional) The maximum number of detections to return. + :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. + If False, each anchor can produce only one label of the class with the highest score. + :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). + If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. + """ + pipeline = self._get_pipeline( + iou=iou, + conf=conf, + fuse_model=fuse_model, + skip_image_resizing=skip_image_resizing, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + fp16=fp16, + ) + return pipeline(images, batch_size=batch_size) # type: ignore + + def predict_webcam( + self, + iou: Optional[float] = None, + conf: Optional[float] = None, + fuse_model: bool = True, + skip_image_resizing: bool = False, + nms_top_k: Optional[int] = None, + max_predictions: Optional[int] = None, + multi_label_per_box: Optional[bool] = None, + class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, + ): + """Predict using webcam. + + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param batch_size: Maximum number of images to process at the same time. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param skip_image_resizing: If True, the image processor will not resize the images. + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. + :param max_predictions: (Optional) The maximum number of detections to return. + :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. + If False, each anchor can produce only one label of the class with the highest score. + :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). + If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. + """ + pipeline = self._get_pipeline( + iou=iou, + conf=conf, + fuse_model=fuse_model, + skip_image_resizing=skip_image_resizing, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + fp16=fp16, + ) + pipeline.predict_webcam() From 837ffd3b407275ef59ba948c62d4b847f5b12cc0 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 11:15:52 +0300 Subject: [PATCH 07/22] added abstract class, small refactoring for pipeline --- .../training/forward_wrappers/__init__.py | 0 .../abstract_forward_wrapper_model.py | 14 ++++ .../detection_models/customizable_detector.py | 17 ++--- .../sliding_window_detector.py | 72 ++++++++++++------- .../training/pipelines/pipelines.py | 24 ++++--- 5 files changed, 79 insertions(+), 48 deletions(-) create mode 100644 src/super_gradients/training/forward_wrappers/__init__.py create mode 100644 src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py diff --git a/src/super_gradients/training/forward_wrappers/__init__.py b/src/super_gradients/training/forward_wrappers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py new file mode 100644 index 0000000000..68531b2d6d --- /dev/null +++ b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py @@ -0,0 +1,14 @@ +import abc + +import torch +from torch import nn +from abc import abstractmethod + + +class AbstractForwardWrapperModel(abc.ABC): + def __init__(self, model: nn.Module): + self.model = model + + @abstractmethod + def __call__(self, inputs: torch.Tensor): + raise NotImplementedError diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index b80f0bd415..bf9c79b98a 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -23,7 +23,7 @@ from super_gradients.training.utils.predict import ImagesDetectionPrediction from super_gradients.training.pipelines.pipelines import DetectionPipeline from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding -from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback, IdentityPostPredictionCallback +from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback from super_gradients.training.utils.media.image import ImageSource import torchvision @@ -328,22 +328,17 @@ def _get_pipeline( else: image_processor = self._image_processor - if self.use_sliding_window_validation: - post_prediction_callback = IdentityPostPredictionCallback() - else: - post_prediction_callback = self.get_post_prediction_callback( + pipeline = DetectionPipeline( + model=self, + image_processor=image_processor, + post_prediction_callback=self.get_post_prediction_callback( iou=iou, conf=conf, nms_top_k=nms_top_k, max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, - ) - - pipeline = DetectionPipeline( - model=self, - image_processor=image_processor, - post_prediction_callback=post_prediction_callback, + ), class_names=self._class_names, fuse_model=fuse_model, fp16=fp16, diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detector.py b/src/super_gradients/training/models/detection_models/sliding_window_detector.py index bfcf817f5e..3e16b5554c 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detector.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detector.py @@ -10,7 +10,6 @@ from super_gradients.training.utils.predict import ImagesDetectionPrediction from super_gradients.training.pipelines.pipelines import DetectionPipeline from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding -from super_gradients.training.utils.detection_utils import IdentityPostPredictionCallback from super_gradients.training.utils.media.image import ImageSource import torchvision from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback @@ -62,8 +61,16 @@ def __init__( self.tile_size = tile_size self.tile_step = tile_step self.min_tile_threshold = min_tile_threshold + + # Processing params self._class_names: Optional[List[str]] = None self._image_processor: Optional[Processing] = None + self._default_nms_iou: float = tile_nms_iou + self._default_nms_conf: float = tile_nms_conf + self._default_nms_top_k: int = tile_nms_top_k + self._default_max_predictions = tile_nms_max_predictions + self._default_multi_label_per_box = tile_nms_multi_label_per_box + self._default_class_agnostic_nms = tile_nms_class_agnostic_nms def forward(self, images): batch_size, _, _, _ = images.shape @@ -74,7 +81,7 @@ def forward(self, images): single_image = images[img_idx : img_idx + 1] # Extract each image tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) for tile, (start_x, start_y) in tiles: - tile_detections = self.forward_whole_image(tile) + tile_detections = self.model(tile) # Apply local NMS using post_prediction_callback tile_detections = self.sliding_window_post_prediction_callback(tile_detections) # Adjust detections to global image coordinates @@ -177,18 +184,28 @@ def set_dataset_processing_params( self._class_names = tuple(class_names) if image_processor is not None: self._image_processor = image_processor - if iou is not None: - self.sliding_window_post_prediction_callback.iou = float(iou) - if conf is not None: - self.sliding_window_post_prediction_callback.conf = float(conf) - if nms_top_k is not None: - self.sliding_window_post_prediction_callback.nms_top_k = int(nms_top_k) - if max_predictions is not None: - self.sliding_window_post_prediction_callback.max_predictions = int(max_predictions) - if multi_label_per_box is not None: - self.sliding_window_post_prediction_callback.multi_label_per_box = bool(multi_label_per_box) - if class_agnostic_nms is not None: - self.sliding_window_post_prediction_callback.class_agnostic_nms = bool(class_agnostic_nms) + + if iou is None: + iou = self._default_nms_iou + if conf is None: + conf = self._default_nms_conf + if nms_top_k is None: + nms_top_k = self._default_nms_top_k + if max_predictions is None: + max_predictions = self._default_max_predictions + if multi_label_per_box is None: + multi_label_per_box = self._default_multi_label_per_box + if class_agnostic_nms is None: + class_agnostic_nms = self._default_class_agnostic_nms + + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=float(iou), + conf=float(conf), + nms_top_k=int(nms_top_k), + max_predictions=int(max_predictions), + multi_label_per_box=bool(multi_label_per_box), + class_agnostic_nms=bool(class_agnostic_nms), + ) def get_processing_params(self) -> Optional[Processing]: return self._image_processor @@ -227,18 +244,12 @@ def _get_pipeline( "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first." ) - self.sliding_window_post_prediction_callback.iou = self.sliding_window_post_prediction_callback.iou if iou is None else iou - self.sliding_window_post_prediction_callback.nms_conf = self.sliding_window_post_prediction_callback.nms_conf if conf is None else conf - self.sliding_window_post_prediction_callback.nms_top_k = self.sliding_window_post_prediction_callback.nms_top_k if nms_top_k is None else nms_top_k - self.sliding_window_post_prediction_callback.max_predictions = ( - self.sliding_window_post_prediction_callback.max_predictions if max_predictions is None else max_predictions - ) - self.sliding_window_post_prediction_callback.multi_label_per_box = ( - self.sliding_window_post_prediction_callback.multi_label_per_box if multi_label_per_box is None else multi_label_per_box - ) - self.sliding_window_post_prediction_callback.class_agnostic_nms = ( - self.sliding_window_post_prediction_callback.class_agnostic_nms if class_agnostic_nms is None else class_agnostic_nms - ) + iou = self._default_nms_iou if iou is None else iou + conf = self._default_nms_conf if conf is None else conf + nms_top_k = self._default_nms_top_k if nms_top_k is None else nms_top_k + max_predictions = self._default_max_predictions if max_predictions is None else max_predictions + multi_label_per_box = self._default_multi_label_per_box if multi_label_per_box is None else multi_label_per_box + class_agnostic_nms = self._default_class_agnostic_nms if class_agnostic_nms is None else class_agnostic_nms # Ensure that the image size is divisible by 32. if isinstance(self._image_processor, ComposeProcessing) and skip_image_resizing: @@ -251,7 +262,14 @@ def _get_pipeline( pipeline = DetectionPipeline( model=self, image_processor=image_processor, - post_prediction_callback=IdentityPostPredictionCallback(), + post_prediction_callback=self.get_post_prediction_callback( + iou=iou, + conf=conf, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + ), class_names=self._class_names, fuse_model=fuse_model, fp16=fp16, diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py index 8c625c2183..9010bc90b2 100644 --- a/src/super_gradients/training/pipelines/pipelines.py +++ b/src/super_gradients/training/pipelines/pipelines.py @@ -207,6 +207,19 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray]) ) # Predict + predictions = self.pass_images_through_model(preprocessed_images) + + # Postprocess + postprocessed_predictions = [] + for image, prediction, processing_metadata in zip(images, predictions, processing_metadatas): + prediction = self.image_processor.postprocess_predictions(predictions=prediction, metadata=processing_metadata) + postprocessed_predictions.append(prediction) + + # Yield results one by one + for image, prediction in zip(images, postprocessed_predictions): + yield self._instantiate_image_prediction(image=image, prediction=prediction) + + def pass_images_through_model(self, preprocessed_images): with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16): torch_inputs = torch.from_numpy(np.array(preprocessed_images)).to(self.device) torch_inputs = torch_inputs.to(self.dtype) @@ -218,16 +231,7 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray]) self._fuse_model(torch_inputs) model_output = self.model(torch_inputs) predictions = self._decode_model_output(model_output, model_input=torch_inputs) - - # Postprocess - postprocessed_predictions = [] - for image, prediction, processing_metadata in zip(images, predictions, processing_metadatas): - prediction = self.image_processor.postprocess_predictions(predictions=prediction, metadata=processing_metadata) - postprocessed_predictions.append(prediction) - - # Yield results one by one - for image, prediction in zip(images, postprocessed_predictions): - yield self._instantiate_image_prediction(image=image, prediction=prediction) + return predictions @abstractmethod def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[Prediction]: From f77616c12d678eb2f718a2ed8a44d730c873b120 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 12:00:27 +0300 Subject: [PATCH 08/22] rolled back customizable detector, solved pretrained weights setting of proccessing for the wrapper --- .../detection_models/customizable_detector.py | 82 ------------------- .../sliding_window_detector.py | 9 +- .../training/pipelines/pipelines.py | 42 +++++++--- 3 files changed, 37 insertions(+), 96 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index bf9c79b98a..1dc372e2aa 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -25,7 +25,6 @@ from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback from super_gradients.training.utils.media.image import ImageSource -import torchvision class CustomizableDetector(HasPredict, SgModule): @@ -46,16 +45,6 @@ def __init__( bn_momentum: Optional[float] = None, inplace_act: Optional[bool] = True, in_channels: int = 3, - use_sliding_window_validation: bool = True, - tile_size=640, - tile_step=64, - min_tile_threshold=30, - tile_nms_iou: float = 0.7, - tile_nms_conf: float = 0.5, - tile_nms_top_k: int = 1024, - tile_nms_max_predictions=300, - tile_nms_multi_label_per_box=True, - tile_nms_class_agnostic_nms=False, ): """ :param backbone: Backbone configuration. @@ -74,13 +63,6 @@ def __init__( self.bn_momentum = bn_momentum self.inplace_act = inplace_act self.in_channels = in_channels - self.use_sliding_window_validation = use_sliding_window_validation - self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( - iou=0.7, conf=0.25, nms_top_k=1024, max_predictions=300, multi_label_per_box=True, class_agnostic_nms=False - ) - self.tile_size = tile_size - self.tile_step = tile_step - self.min_tile_threshold = min_tile_threshold factory = det_factory.DetectionModulesFactory() # move num_classes into heads params @@ -107,71 +89,7 @@ def __init__( self._default_multi_label_per_box = True self._default_class_agnostic_nms = False - def forward_sliding_window(self, images): - batch_size, _, _, _ = images.shape - all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch - - # Generate and process each tile - for img_idx in range(batch_size): - single_image = images[img_idx : img_idx + 1] # Extract each image - tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) - for tile, (start_x, start_y) in tiles: - tile_detections = self.forward_whole_image(tile) - # Apply local NMS using post_prediction_callback - tile_detections = self.sliding_window_post_prediction_callback(tile_detections) - # Adjust detections to global image coordinates - for img_i_tile_detections in tile_detections: - if len(img_i_tile_detections) > 0: - img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) - all_detections[img_idx].append(img_i_tile_detections) - - # Concatenate and apply global NMS for each image's detections - final_detections = [] - for detections in all_detections: - if detections: - detections = torch.cat(detections, dim=0) - # Apply global NMS - pred_bboxes = detections[:, :4] - pred_cls_conf = detections[:, 4] - pred_cls_label = detections[:, 5] - idx_to_keep = torchvision.ops.boxes.batched_nms( - boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self._default_nms_iou - ) - - final_detections.append(detections[idx_to_keep]) - else: - final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections - return final_detections - - def _generate_tiles(self, image, tile_size, tile_step): - _, _, h, w = image.shape - tiles = [] - - # Calculate the end points for the grid - max_y = h if (h - tile_size) % tile_step < self.min_tile_threshold else h - (h - tile_size) % tile_step + tile_size - max_x = w if (w - tile_size) % tile_step < self.min_tile_threshold else w - (w - tile_size) % tile_step + tile_size - - # Ensure that the image has enough padding if needed - if max_y > h or max_x > w: - padded_image = torch.zeros((image.shape[0], image.shape[1], max(max_y, h), max(max_x, w)), device=image.device) - padded_image[:, :, :h, :w] = image # Place the original image in the padded one - else: - padded_image = image - - for y in range(0, max_y - tile_size + 1, tile_step): - for x in range(0, max_x - tile_size + 1, tile_step): - tile = padded_image[:, :, y : y + tile_size, x : x + tile_size] - tiles.append((tile, (x, y))) - - return tiles - def forward(self, x): - if self.use_sliding_window_validation and not self.training: - return self.forward_sliding_window(x) - else: - return self.forward_whole_image(x) - - def forward_whole_image(self, x): x = self.backbone(x) x = self.neck(x) return self.heads(x) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detector.py b/src/super_gradients/training/models/detection_models/sliding_window_detector.py index 3e16b5554c..fca4eae376 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detector.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detector.py @@ -64,7 +64,7 @@ def __init__( # Processing params self._class_names: Optional[List[str]] = None - self._image_processor: Optional[Processing] = None + self._image_processor: Optional[Processing] = self.model.get_processing_params() self._default_nms_iou: float = tile_nms_iou self._default_nms_conf: float = tile_nms_conf self._default_nms_top_k: int = tile_nms_top_k @@ -72,7 +72,8 @@ def __init__( self._default_multi_label_per_box = tile_nms_multi_label_per_box self._default_class_agnostic_nms = tile_nms_class_agnostic_nms - def forward(self, images): + def forward(self, images: torch.Tensor, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): + sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback batch_size, _, _, _ = images.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch @@ -83,7 +84,7 @@ def forward(self, images): for tile, (start_x, start_y) in tiles: tile_detections = self.model(tile) # Apply local NMS using post_prediction_callback - tile_detections = self.sliding_window_post_prediction_callback(tile_detections) + tile_detections = sliding_window_post_prediction_callback(tile_detections) # Adjust detections to global image coordinates for img_i_tile_detections in tile_detections: if len(img_i_tile_detections) > 0: @@ -100,7 +101,7 @@ def forward(self, images): pred_cls_conf = detections[:, 4] pred_cls_label = detections[:, 5] idx_to_keep = torchvision.ops.boxes.batched_nms( - boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=self.sliding_window_post_prediction_callback.iou + boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=sliding_window_post_prediction_callback.iou ) final_detections.append(detections[idx_to_keep]) diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py index 9010bc90b2..a80df16456 100644 --- a/src/super_gradients/training/pipelines/pipelines.py +++ b/src/super_gradients/training/pipelines/pipelines.py @@ -219,20 +219,22 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray]) for image, prediction in zip(images, postprocessed_predictions): yield self._instantiate_image_prediction(image=image, prediction=prediction) - def pass_images_through_model(self, preprocessed_images): + def pass_images_through_model(self, preprocessed_images: List[np.ndarray]) -> List[Prediction]: with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16): - torch_inputs = torch.from_numpy(np.array(preprocessed_images)).to(self.device) - torch_inputs = torch_inputs.to(self.dtype) - - if isinstance(self.model, SupportsInputShapeCheck): - self.model.validate_input_shape(torch_inputs.size()) - - if self.fuse_model: - self._fuse_model(torch_inputs) + torch_inputs = self._prep_inputs_for_model(preprocessed_images) model_output = self.model(torch_inputs) predictions = self._decode_model_output(model_output, model_input=torch_inputs) return predictions + def _prep_inputs_for_model(self, preprocessed_images: List[np.ndarray]) -> torch.Tensor: + torch_inputs = torch.from_numpy(np.array(preprocessed_images)).to(self.device) + torch_inputs = torch_inputs.to(self.dtype) + if isinstance(self.model, SupportsInputShapeCheck): + self.model.validate_input_shape(torch_inputs.size()) + if self.fuse_model: + self._fuse_model(torch_inputs) + return torch_inputs + @abstractmethod def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[Prediction]: """Decode the model outputs, move each prediction to numpy and store it in a Prediction object. @@ -328,7 +330,10 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m :return: Predicted Bboxes. """ post_nms_predictions = self.post_prediction_callback(model_output, device=self.device) + return self._decode_detection_model_output(model_input, post_nms_predictions) + @staticmethod + def _decode_detection_model_output(model_input: np.ndarray, post_nms_predictions: List[torch.Tensor]) -> List[DetectionPrediction]: predictions = [] for prediction, image in zip(post_nms_predictions, model_input): prediction = prediction if prediction is not None else torch.zeros((0, 6), dtype=torch.float32) @@ -342,7 +347,6 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m image_shape=image.shape, ) ) - return predictions def _instantiate_image_prediction(self, image: np.ndarray, prediction: DetectionPrediction) -> ImagePrediction: @@ -366,6 +370,24 @@ def _combine_image_prediction_to_video( return VideoDetectionPrediction(_images_prediction_gen=images_predictions, fps=fps, n_frames=n_images) +class SlidingWindowDetectionPipeline(DetectionPipeline): + def pass_images_through_model(self, preprocessed_images: List[np.ndarray]) -> List[Prediction]: + with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16): + torch_inputs = self._prep_inputs_for_model(preprocessed_images) + model_output = self.model(torch_inputs, sliding_window_post_prediction_callback=self.post_prediction_callback) + predictions = self._decode_model_output(model_output, model_input=torch_inputs) + return predictions + + def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[DetectionPrediction]: + """Decode the model output, by applying post prediction callback. This includes NMS. + + :param model_output: Direct output of the model, without any post-processing. + :param model_input: Model input (i.e. images after preprocessing). + :return: Predicted Bboxes. + """ + return self._decode_detection_model_output(model_input, model_output) + + class PoseEstimationPipeline(Pipeline): """Pipeline specifically designed for pose estimation tasks. The pipeline includes loading images, preprocessing, prediction, and postprocessing. From dce1b4a1c52b495211b2405e2fd4918bd25b180b Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 12:04:36 +0300 Subject: [PATCH 09/22] temp cleanup --- src/super_gradients/training/processing/processing.py | 4 ++-- src/super_gradients/training/utils/detection_utils.py | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/super_gradients/training/processing/processing.py b/src/super_gradients/training/processing/processing.py index 7c47a42468..0fb13f481f 100644 --- a/src/super_gradients/training/processing/processing.py +++ b/src/super_gradients/training/processing/processing.py @@ -964,8 +964,8 @@ def default_yolo_nas_coco_processing_params() -> dict: image_processor = ComposeProcessing( [ - DetectionLongestMaxSizeRescale(output_shape=(1280, 1280)), - DetectionCenterPadding(output_shape=(1280, 1280), pad_value=114), + DetectionLongestMaxSizeRescale(output_shape=(636, 636)), + DetectionCenterPadding(output_shape=(640, 640), pad_value=114), StandardizeImage(max_value=255.0), ImagePermute(permutation=(2, 0, 1)), ] diff --git a/src/super_gradients/training/utils/detection_utils.py b/src/super_gradients/training/utils/detection_utils.py index 844acd2829..0d528d837f 100755 --- a/src/super_gradients/training/utils/detection_utils.py +++ b/src/super_gradients/training/utils/detection_utils.py @@ -228,15 +228,6 @@ def forward(self, x, device: str = None): raise NotImplementedError -class IdentityPostPredictionCallback(DetectionPostPredictionCallback): - """ - Detection Post Prediction callback that simply returns the input - """ - - def forward(self, x, device: str = None): - return x - - class IouThreshold(tuple, Enum): MAP_05 = (0.5, 0.5) MAP_05_TO_095 = (0.5, 0.95) From 6c64ddd04573bacbd6d2d748dd1d05775b5d3aca Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 12:20:32 +0300 Subject: [PATCH 10/22] support for fuse model in predict --- .../models/detection_models/sliding_window_detector.py | 7 +++++-- src/super_gradients/training/pipelines/pipelines.py | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detector.py b/src/super_gradients/training/models/detection_models/sliding_window_detector.py index fca4eae376..10e7f79a63 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detector.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detector.py @@ -2,7 +2,7 @@ from functools import lru_cache import torch - +from torch import nn from super_gradients.common.decorators.factory_decorator import resolve_param from super_gradients.common.factories.processing_factory import ProcessingFactory from super_gradients.module_interfaces import HasPredict @@ -15,7 +15,7 @@ from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback -class SlidingWindowInferenceDetectionWrapper(HasPredict, SgModule): +class SlidingWindowInferenceDetectionWrapper(HasPredict, nn.Module): """ A customizable detector with backbone -> neck -> heads Each submodule with its parameters must be defined explicitly. @@ -361,3 +361,6 @@ def predict_webcam( fp16=fp16, ) pipeline.predict_webcam() + + def get_input_channels(self) -> int: + return self.model.get_input_channels() diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py index a80df16456..6f059d6de0 100644 --- a/src/super_gradients/training/pipelines/pipelines.py +++ b/src/super_gradients/training/pipelines/pipelines.py @@ -387,6 +387,13 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m """ return self._decode_detection_model_output(model_input, model_output) + def _fuse_model(self, input_example: torch.Tensor): + logger.info("Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`") + self.model = copy.deepcopy(self.model) + self.model.eval() + self.model.model.prep_model_for_conversion(input_size=input_example.shape[-2:]) + self.fuse_model = False + class PoseEstimationPipeline(Pipeline): """Pipeline specifically designed for pose estimation tasks. From 2cdf4ff80f71d80a572310937458e81d3d623779 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 13:09:40 +0300 Subject: [PATCH 11/22] example added for predict --- .../examples/predict/detection_predict.py | 2 +- .../sliding_sindow_detection_predict.py | 20 +++++++++++++++++++ .../abstract_forward_wrapper_model.py | 5 +---- ...iding_window_detection_forward_wrapper.py} | 16 ++++++++------- .../detection_models/customizable_detector.py | 3 +++ 5 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 src/super_gradients/examples/predict/sliding_sindow_detection_predict.py rename src/super_gradients/training/{models/detection_models/sliding_window_detector.py => forward_wrappers/sliding_window_detection_forward_wrapper.py} (96%) diff --git a/src/super_gradients/examples/predict/detection_predict.py b/src/super_gradients/examples/predict/detection_predict.py index 60b4b2b633..735871d5f4 100644 --- a/src/super_gradients/examples/predict/detection_predict.py +++ b/src/super_gradients/examples/predict/detection_predict.py @@ -3,7 +3,7 @@ from super_gradients.training import models # Note that currently only YoloX, PPYoloE and YOLO-NAS are supported. -model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco") +model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco") # We want to use cuda if available to speed up inference. model = model.to("cuda" if torch.cuda.is_available() else "cpu") diff --git a/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py b/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py new file mode 100644 index 0000000000..eee3daee57 --- /dev/null +++ b/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py @@ -0,0 +1,20 @@ +import torch +from super_gradients.common.object_names import Models +from super_gradients.training import models + + +# Note that currently only YoloX, PPYoloE and YOLO-NAS are supported. +from super_gradients.training.forward_wrappers.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper + +model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco") + +# We want to use cuda if available to speed up inference. +model = model.to("cuda" if torch.cuda.is_available() else "cpu") + +model = SlidingWindowInferenceDetectionWrapper(model) + +predictions = model.predict( + "https://images.pexels.com/photos/7968254/pexels-photo-7968254.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", skip_image_resizing=True +) +predictions.show() +predictions.save(output_path="2.jpg") # Save in working directory diff --git a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py index 68531b2d6d..9d1b160c56 100644 --- a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py +++ b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py @@ -6,9 +6,6 @@ class AbstractForwardWrapperModel(abc.ABC): - def __init__(self, model: nn.Module): - self.model = model - @abstractmethod - def __call__(self, inputs: torch.Tensor): + def __call__(self, images: torch.Tensor, model: nn.Module, **kwargs): raise NotImplementedError diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detector.py b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py similarity index 96% rename from src/super_gradients/training/models/detection_models/sliding_window_detector.py rename to src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py index 10e7f79a63..afeb1c6bfa 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detector.py +++ b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py @@ -6,16 +6,17 @@ from super_gradients.common.decorators.factory_decorator import resolve_param from super_gradients.common.factories.processing_factory import ProcessingFactory from super_gradients.module_interfaces import HasPredict +from super_gradients.training.forward_wrappers.abstract_forward_wrapper_model import AbstractForwardWrapperModel from super_gradients.training.models.sg_module import SgModule from super_gradients.training.utils.predict import ImagesDetectionPrediction -from super_gradients.training.pipelines.pipelines import DetectionPipeline +from super_gradients.training.pipelines.pipelines import SlidingWindowDetectionPipeline from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding from super_gradients.training.utils.media.image import ImageSource import torchvision from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback -class SlidingWindowInferenceDetectionWrapper(HasPredict, nn.Module): +class SlidingWindowInferenceDetectionWrapper(HasPredict, AbstractForwardWrapperModel, nn.Module): """ A customizable detector with backbone -> neck -> heads Each submodule with its parameters must be defined explicitly. @@ -63,7 +64,7 @@ def __init__( self.min_tile_threshold = min_tile_threshold # Processing params - self._class_names: Optional[List[str]] = None + self._class_names: Optional[List[str]] = self.model.get_class_names() self._image_processor: Optional[Processing] = self.model.get_processing_params() self._default_nms_iou: float = tile_nms_iou self._default_nms_conf: float = tile_nms_conf @@ -72,7 +73,8 @@ def __init__( self._default_multi_label_per_box = tile_nms_multi_label_per_box self._default_class_agnostic_nms = tile_nms_class_agnostic_nms - def forward(self, images: torch.Tensor, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): + def __call__(self, images: torch.Tensor, model: nn.Module = None, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): + sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback batch_size, _, _, _ = images.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch @@ -101,7 +103,7 @@ def forward(self, images: torch.Tensor, sliding_window_post_prediction_callback: pred_cls_conf = detections[:, 4] pred_cls_label = detections[:, 5] idx_to_keep = torchvision.ops.boxes.batched_nms( - boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=sliding_window_post_prediction_callback.iou + boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=sliding_window_post_prediction_callback.score_threshold ) final_detections.append(detections[idx_to_keep]) @@ -224,7 +226,7 @@ def _get_pipeline( multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, fp16: bool = True, - ) -> DetectionPipeline: + ) -> SlidingWindowDetectionPipeline: """Instantiate the prediction pipeline of this model. :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. @@ -260,7 +262,7 @@ def _get_pipeline( else: image_processor = self._image_processor - pipeline = DetectionPipeline( + pipeline = SlidingWindowDetectionPipeline( model=self, image_processor=image_processor, post_prediction_callback=self.get_post_prediction_callback( diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index 1dc372e2aa..bcd625a1e4 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -197,6 +197,9 @@ def set_dataset_processing_params( def get_processing_params(self) -> Optional[Processing]: return self._image_processor + def get_class_names(self) -> Optional[List[str]]: + return self._class_names + @lru_cache(maxsize=1) def _get_pipeline( self, From 80d81e91e55144cabc432ec81ae855b30403b52c Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 14:02:30 +0300 Subject: [PATCH 12/22] added support for forward wrappers in trainer --- .../factories/forward_wrappers_factory.py | 7 +++++++ .../common/registry/registry.py | 4 ++++ .../default_train_params.yaml | 2 ++ ...liding_window_detection_forward_wrapper.py | 19 +++++++++++-------- src/super_gradients/training/params.py | 4 +++- .../training/sg_trainer/sg_trainer.py | 10 +++++++++- 6 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 src/super_gradients/common/factories/forward_wrappers_factory.py diff --git a/src/super_gradients/common/factories/forward_wrappers_factory.py b/src/super_gradients/common/factories/forward_wrappers_factory.py new file mode 100644 index 0000000000..a03539a000 --- /dev/null +++ b/src/super_gradients/common/factories/forward_wrappers_factory.py @@ -0,0 +1,7 @@ +from super_gradients.common.factories.base_factory import BaseFactory +from super_gradients.common.registry.registry import FORWARD_WRAPPERS + + +class ForwardWrappersFactory(BaseFactory): + def __init__(self): + super().__init__(FORWARD_WRAPPERS) diff --git a/src/super_gradients/common/registry/registry.py b/src/super_gradients/common/registry/registry.py index e303f3766f..2e4bde6f1a 100644 --- a/src/super_gradients/common/registry/registry.py +++ b/src/super_gradients/common/registry/registry.py @@ -160,6 +160,10 @@ def warn_if_deprecated(name: str, registry: dict): ALL_COLLATE_FUNCTIONS = {} register_collate_function = create_register_decorator(registry=ALL_COLLATE_FUNCTIONS) +FORWARD_WRAPPERS = {} +register_forward_wrapper = create_register_decorator(registry=FORWARD_WRAPPERS) + + SAMPLERS = { Samplers.DISTRIBUTED: torch.utils.data.DistributedSampler, Samplers.SEQUENTIAL: torch.utils.data.SequentialSampler, diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml index 6b560df81c..2164c632e6 100644 --- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml @@ -109,6 +109,8 @@ max_train_batches: # For debug- when not None- will break out of inner train lo max_valid_batches: # For debug- when not None- will break out of inner valid loop # (i.e iterating over valid_loader) when reaching this number of batches. +validation_forward_wrapper: None # callable that expects - images: torch.Tensor, model: nn.Module and will replace the model's forward during validation. + sg_logger: base_sg_logger sg_logger_params: tb_files_user_prompt: False # Asks User for Tensorboard Deletion Prompt diff --git a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py index afeb1c6bfa..28d3f17e6f 100644 --- a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py @@ -5,9 +5,10 @@ from torch import nn from super_gradients.common.decorators.factory_decorator import resolve_param from super_gradients.common.factories.processing_factory import ProcessingFactory +from super_gradients.common.registry.registry import register_forward_wrapper from super_gradients.module_interfaces import HasPredict from super_gradients.training.forward_wrappers.abstract_forward_wrapper_model import AbstractForwardWrapperModel -from super_gradients.training.models.sg_module import SgModule +from super_gradients.training.models import CustomizableDetector from super_gradients.training.utils.predict import ImagesDetectionPrediction from super_gradients.training.pipelines.pipelines import SlidingWindowDetectionPipeline from super_gradients.training.processing.processing import Processing, ComposeProcessing, DetectionAutoPadding @@ -16,6 +17,7 @@ from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback +@register_forward_wrapper("SlidingWindowInferenceDetectionWrapper") class SlidingWindowInferenceDetectionWrapper(HasPredict, AbstractForwardWrapperModel, nn.Module): """ A customizable detector with backbone -> neck -> heads @@ -25,7 +27,7 @@ class SlidingWindowInferenceDetectionWrapper(HasPredict, AbstractForwardWrapperM def __init__( self, - model: SgModule, + model: CustomizableDetector, tile_size=640, tile_step=64, min_tile_threshold=30, @@ -73,18 +75,19 @@ def __init__( self._default_multi_label_per_box = tile_nms_multi_label_per_box self._default_class_agnostic_nms = tile_nms_class_agnostic_nms - def __call__(self, images: torch.Tensor, model: nn.Module = None, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): - + def __call__(self, inputs: torch.Tensor, model: nn.Module = None, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): + model = model or self.model sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback - batch_size, _, _, _ = images.shape + + batch_size, _, _, _ = inputs.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch # Generate and process each tile for img_idx in range(batch_size): - single_image = images[img_idx : img_idx + 1] # Extract each image + single_image = inputs[img_idx : img_idx + 1] # Extract each image tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) for tile, (start_x, start_y) in tiles: - tile_detections = self.model(tile) + tile_detections = model(tile) # Apply local NMS using post_prediction_callback tile_detections = sliding_window_post_prediction_callback(tile_detections) # Adjust detections to global image coordinates @@ -108,7 +111,7 @@ def __call__(self, images: torch.Tensor, model: nn.Module = None, sliding_window final_detections.append(detections[idx_to_keep]) else: - final_detections.append(torch.empty(0, 6).to(images.device)) # Empty tensor for images with no detections + final_detections.append(torch.empty(0, 6).to(inputs.device)) # Empty tensor for images with no detections return final_detections def _generate_tiles(self, image, tile_size, tile_step): diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py index 30f3c90c03..4df63abffa 100755 --- a/src/super_gradients/training/params.py +++ b/src/super_gradients/training/params.py @@ -81,8 +81,10 @@ "options": None, # A dictionary of options to pass to the backend. "disable": False, # Turn torch.compile() into a no-op for testing }, # torch.compile options from https://pytorch.org/docs/stable/generated/torch.compile.html - "finetune": False # Whether to freeze a fixed part of the model (supported only for models that implement + "finetune": False, # Whether to freeze a fixed part of the model (supported only for models that implement # get_finetune_lr_dict, see SgModule.get_finetune_lr_dict. Tailored for each model class.) + "validation_forward_wrapper": None # callable that expects - inputs: torch.Tensor, model: nn.Module and will + # replace the model's forward during validation. } DEFAULT_OPTIMIZER_PARAMS_SGD = {"weight_decay": 1e-4, "momentum": 0.9} diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py index 7ba08998ae..4eb9658375 100755 --- a/src/super_gradients/training/sg_trainer/sg_trainer.py +++ b/src/super_gradients/training/sg_trainer/sg_trainer.py @@ -28,6 +28,7 @@ validate_run_id, get_checkpoints_dir_path, ) +from super_gradients.common.factories.forward_wrappers_factory import ForwardWrappersFactory from super_gradients.module_interfaces import HasPreprocessingParams, HasPredict from super_gradients.modules.repvgg_block import fuse_repvgg_blocks_residual_branches from super_gradients.import_utils import import_pytorch_quantization_or_install @@ -170,6 +171,7 @@ def __init__(self, experiment_name: str, device: Optional[str] = None, multi_gpu self.phase_callbacks = None self.checkpoint_params = None self.pre_prediction_callback = None + self.validation_forward_wrapper = None # SET THE DEFAULT PROPERTIES self.half_precision = False @@ -1417,6 +1419,8 @@ def get_finetune_lr_dict(self, lr: float) -> Dict[str, float]: self.pre_prediction_callback = CallbacksFactory().get(self.training_params.pre_prediction_callback) + self.validation_forward_wrapper = ForwardWrappersFactory().get(self.training_params.validation_forward_wrapper) + self.training_params.mixed_precision = self._initialize_mixed_precision(self.training_params.mixed_precision) self.ckpt_best_name = self.training_params.ckpt_best_name @@ -2285,7 +2289,11 @@ def evaluate( else: self.phase_callback_handler.on_test_batch_start(context) - output = self.net(inputs) + if self.self.validation_forward_wrapper is not None: + output = self.validation_forward_wrapper(inputs, self.net) + else: + output = self.net(inputs) + context.update_context(preds=output) if self.criterion is not None: From bf809eb481cdff519c23079eb082a8854491dfd2 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 14:36:44 +0300 Subject: [PATCH 13/22] added test for validation forward wrapper --- .../training/sg_trainer/sg_trainer.py | 4 +- tests/deci_core_unit_test_suite_runner.py | 2 + tests/unit_tests/__init__.py | 2 + tests/unit_tests/forward_wrapper_test.py | 79 +++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/unit_tests/forward_wrapper_test.py diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py index 4eb9658375..b0bf64192b 100755 --- a/src/super_gradients/training/sg_trainer/sg_trainer.py +++ b/src/super_gradients/training/sg_trainer/sg_trainer.py @@ -2289,8 +2289,8 @@ def evaluate( else: self.phase_callback_handler.on_test_batch_start(context) - if self.self.validation_forward_wrapper is not None: - output = self.validation_forward_wrapper(inputs, self.net) + if self.validation_forward_wrapper is not None: + output = self.validation_forward_wrapper(inputs=inputs, model=self.net) else: output = self.net(inputs) diff --git a/tests/deci_core_unit_test_suite_runner.py b/tests/deci_core_unit_test_suite_runner.py index 27a92f1079..fda112e5ee 100644 --- a/tests/deci_core_unit_test_suite_runner.py +++ b/tests/deci_core_unit_test_suite_runner.py @@ -30,6 +30,7 @@ TestClassificationAdapter, TestDetectionAdapter, TestSegmentationAdapter, + TestForwardWrapper, ) from tests.end_to_end_tests import TestTrainer from tests.unit_tests.depth_estimation_dataset_test import DepthEstimationDatasetTest @@ -192,6 +193,7 @@ def _add_modules_to_unit_tests_suite(self): self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(ClassBalancerTest)) self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(ClassBalancedSamplerTest)) self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestSegmentationModelExport)) + self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestForwardWrapper)) def _add_modules_to_end_to_end_tests_suite(self): """ diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py index c43384006f..9a39e2f069 100644 --- a/tests/unit_tests/__init__.py +++ b/tests/unit_tests/__init__.py @@ -2,6 +2,7 @@ from tests.unit_tests.crash_tips_test import CrashTipTest from tests.unit_tests.double_training_test import CallTrainTwiceTest from tests.unit_tests.factories_test import FactoriesTest +from tests.unit_tests.forward_wrapper_test import TestForwardWrapper from tests.unit_tests.optimizer_params_override_test import TrainOptimizerParamsOverride from tests.unit_tests.resume_training_test import ResumeTrainingTest from tests.unit_tests.strictload_enum_test import StrictLoadEnumTest @@ -63,4 +64,5 @@ "TestClassificationAdapter", "TestDetectionAdapter", "TestSegmentationAdapter", + "TestForwardWrapper", ] diff --git a/tests/unit_tests/forward_wrapper_test.py b/tests/unit_tests/forward_wrapper_test.py new file mode 100644 index 0000000000..d2682779f6 --- /dev/null +++ b/tests/unit_tests/forward_wrapper_test.py @@ -0,0 +1,79 @@ +import unittest + +from super_gradients.training import Trainer +from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader +from super_gradients.training.metrics import Accuracy +from super_gradients.training.models import LeNet +from super_gradients.training.utils.callbacks import PhaseContext, Callback +import torch + + +class OutputsCollectorCallback(Callback): + def __init__(self): + self.validation_outputs = [] + self.train_outputs = [] + + def on_validation_batch_end(self, context: PhaseContext) -> None: + self.validation_outputs.append(context.preds) + + def on_train_batch_end(self, context: PhaseContext) -> None: + self.train_outputs.append(context.preds) + + +class DummyForwardWrapper: + def __call__(self, inputs: torch.Tensor, model: torch.nn.Module): + return torch.ones_like(model(inputs)) + + +def compare_tensor_lists(list1, list2): + if len(list1) != len(list2): + return False + + # Move tensors to CPU + list1 = [t.cpu() for t in list1] + list2 = [t.cpu() for t in list2] + + for tensor1, tensor2 in zip(list1, list2): + if not torch.all(torch.eq(tensor1, tensor2)): + return False + return True + + +class TestForwardWrapper(unittest.TestCase): + def test_train_with_validation_forward_wrapper(self): + # Define Model + net = LeNet() + trainer = Trainer("test_train_with_validation_forward_wrapper") + output_collector = OutputsCollectorCallback() + validation_forward_wrapper = DummyForwardWrapper() + train_params = { + "max_epochs": 1, + "initial_lr": 0.1, + "loss": "CrossEntropyLoss", + "optimizer": "SGD", + "criterion_params": {}, + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, + "train_metrics_list": [Accuracy()], + "valid_metrics_list": [Accuracy()], + "metric_to_watch": "Accuracy", + "greater_metric_to_watch_is_better": True, + "ema": False, + "phase_callbacks": [output_collector], + "warmup_mode": "LinearEpochLRWarmup", + "validation_forward_wrapper": validation_forward_wrapper, + "average_best_models": False, + } + + expected_outputs = [torch.ones(4, 10)] + trainer.train( + model=net, + training_params=train_params, + train_loader=classification_test_dataloader(batch_size=4), + valid_loader=classification_test_dataloader(batch_size=4), + ) + self.assertTrue(compare_tensor_lists(expected_outputs, output_collector.validation_outputs)) + self.assertFalse(compare_tensor_lists(expected_outputs, output_collector.train_outputs)) + + +if __name__ == "__main__": + unittest.main() From 877e0164a28d92563a2d5ffcdc1f1c079bc43239 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 9 May 2024 15:04:15 +0300 Subject: [PATCH 14/22] added option for None as post prediction callback in DetectionMetrics --- ...liding_window_detection_forward_wrapper.py | 41 +++++++++++-------- .../training/metrics/detection_metrics.py | 3 +- .../detection_sliding_window_wrapper_test.py | 10 +++++ tests/unit_tests/forward_wrapper_test.py | 18 ++++++++ 4 files changed, 53 insertions(+), 19 deletions(-) create mode 100644 tests/unit_tests/detection_sliding_window_wrapper_test.py diff --git a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py index 28d3f17e6f..33ab3632d7 100644 --- a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py @@ -27,9 +27,9 @@ class SlidingWindowInferenceDetectionWrapper(HasPredict, AbstractForwardWrapperM def __init__( self, - model: CustomizableDetector, - tile_size=640, - tile_step=64, + tile_size: int, + tile_step: int, + model: Optional[CustomizableDetector], min_tile_threshold=30, tile_nms_iou: float = 0.7, tile_nms_conf: float = 0.5, @@ -78,7 +78,8 @@ def __init__( def __call__(self, inputs: torch.Tensor, model: nn.Module = None, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): model = model or self.model sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback - + if None in [model, sliding_window_post_prediction_callback]: + raise RuntimeError("model and sliding_window_post_prediction_callback must be passed when model is not " "passed in __init__.") batch_size, _, _, _ = inputs.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch @@ -152,14 +153,17 @@ def get_post_prediction_callback( If False NMS is performed separately for each class. :return: """ - return self.model.get_post_prediction_callback( - conf=conf, - iou=iou, - nms_top_k=nms_top_k, - max_predictions=max_predictions, - multi_label_per_box=multi_label_per_box, - class_agnostic_nms=class_agnostic_nms, - ) + if self.model: + return self.model.get_post_prediction_callback( + conf=conf, + iou=iou, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + ) + else: + return None @resolve_param("image_processor", ProcessingFactory()) def set_dataset_processing_params( @@ -177,10 +181,10 @@ def set_dataset_processing_params( :param class_names: (Optional) Names of the dataset the model was trained on. :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training. - :param iou: (Optional) IoU threshold for the nms algorithm + :param iou: (Optional) IoU threshold for the nms algorithm applied. :param conf: (Optional) Below the confidence threshold, prediction are discarded - :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. - :param max_predictions: (Optional) The maximum number of detections to return. + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS in each tile. + :param max_predictions: (Optional) The maximum number of detections to return in each tile. :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). @@ -232,13 +236,14 @@ def _get_pipeline( ) -> SlidingWindowDetectionPipeline: """Instantiate the prediction pipeline of this model. - :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param iou: (Optional) IoU threshold for the nms algorithm. + If None, the default value associated to the training is used. :param conf: (Optional) Below the confidence threshold, prediction are discarded. If None, the default value associated to the training is used. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. - :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. - :param max_predictions: (Optional) The maximum number of detections to return. + :param nms_top_k: (Optional) The maximum number of detections to consider for NMS for each tile. + :param max_predictions: (Optional) The maximum number of detections to return for each tile. :param multi_label_per_box: (Optional) If True, each anchor can produce multiple labels of different classes. If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). diff --git a/src/super_gradients/training/metrics/detection_metrics.py b/src/super_gradients/training/metrics/detection_metrics.py index 10beb2a6c8..fd99451e91 100755 --- a/src/super_gradients/training/metrics/detection_metrics.py +++ b/src/super_gradients/training/metrics/detection_metrics.py @@ -179,7 +179,8 @@ def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, targets = target.clone() crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone() - preds = self.post_prediction_callback(preds, device=device) + if self.post_prediction_callback is not None: + preds = self.post_prediction_callback(preds, device=device) new_matching_info = compute_detection_matching( preds, diff --git a/tests/unit_tests/detection_sliding_window_wrapper_test.py b/tests/unit_tests/detection_sliding_window_wrapper_test.py new file mode 100644 index 0000000000..a74ae6aeb7 --- /dev/null +++ b/tests/unit_tests/detection_sliding_window_wrapper_test.py @@ -0,0 +1,10 @@ +import unittest + + +class TestSlidingForwardWrapper(unittest.TestCase): + def test_something(self): + self.assertEqual(True, False) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit_tests/forward_wrapper_test.py b/tests/unit_tests/forward_wrapper_test.py index d2682779f6..106d48ca73 100644 --- a/tests/unit_tests/forward_wrapper_test.py +++ b/tests/unit_tests/forward_wrapper_test.py @@ -9,6 +9,10 @@ class OutputsCollectorCallback(Callback): + """ + Simple callback that collects validation and train outputs for testing + """ + def __init__(self): self.validation_outputs = [] self.train_outputs = [] @@ -21,11 +25,25 @@ def on_train_batch_end(self, context: PhaseContext) -> None: class DummyForwardWrapper: + """ + Simple foward wrapper that ignores the model output and returns output of the same shape as the model filled with ones. + """ + def __call__(self, inputs: torch.Tensor, model: torch.nn.Module): return torch.ones_like(model(inputs)) def compare_tensor_lists(list1, list2): + """ + Compares two lists of PyTorch tensors for element-wise equality, ensuring all operations are performed on the CPU. + + :param list1: The first list of tensors. Each element must be a PyTorch Tensor. + :param list2: The second list of tensors. Each element must be a PyTorch Tensor. + :return: True if all tensors in both lists are equal element-wise, False otherwise. + + Note: This function explicitly moves all tensors to the CPU before comparison, + making it suitable for environments where GPU resources are not desired or available. + """ if len(list1) != len(list2): return False From 8192a15e530f85af100babe06b725552a8de47c4 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 15 May 2024 13:31:17 +0300 Subject: [PATCH 15/22] wip adding set_model before using wrapper --- .../abstract_forward_wrapper_model.py | 30 ++++++++-- ...liding_window_detection_forward_wrapper.py | 56 +++++++++++++------ .../training/metrics/detection_metrics.py | 2 + .../detection_sliding_window_wrapper_test.py | 50 ++++++++++++++++- 4 files changed, 112 insertions(+), 26 deletions(-) diff --git a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py index 9d1b160c56..851934ba20 100644 --- a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py +++ b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py @@ -1,11 +1,31 @@ import abc - import torch from torch import nn -from abc import abstractmethod class AbstractForwardWrapperModel(abc.ABC): - @abstractmethod - def __call__(self, images: torch.Tensor, model: nn.Module, **kwargs): - raise NotImplementedError + def __init__(self, model: nn.Module = None): + """ + Initialize the AbstractForwardWrapperModel with an optional PyTorch model. + + :param model: An instance of nn.Module to be wrapped by this class, default is None. + """ + self.model = model + + @abc.abstractmethod + def __call__(self, inputs: torch.Tensor) -> torch.Tensor: + """ + Abstract method to be implemented by subclasses that defines the forward pass. + + :param inputs: A torch.Tensor containing the input to the model. + :return: A torch.Tensor containing the model's output. + """ + raise NotImplementedError("Subclasses must implement this method") + + def set_model(self, model: nn.Module): + """ + Set the model for this wrapper. + + :param model: An instance of nn.Module to be used by this wrapper. + """ + self.model = model diff --git a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py index 33ab3632d7..004692cc10 100644 --- a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py @@ -29,7 +29,7 @@ def __init__( self, tile_size: int, tile_step: int, - model: Optional[CustomizableDetector], + model: Optional[CustomizableDetector] = None, min_tile_threshold=30, tile_nms_iou: float = 0.7, tile_nms_conf: float = 0.5, @@ -53,21 +53,26 @@ def __init__( super().__init__() self.model = model - self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( - iou=tile_nms_iou, - conf=tile_nms_conf, - nms_top_k=tile_nms_top_k, - max_predictions=tile_nms_max_predictions, - multi_label_per_box=tile_nms_multi_label_per_box, - class_agnostic_nms=tile_nms_class_agnostic_nms, - ) self.tile_size = tile_size self.tile_step = tile_step self.min_tile_threshold = min_tile_threshold # Processing params - self._class_names: Optional[List[str]] = self.model.get_class_names() - self._image_processor: Optional[Processing] = self.model.get_processing_params() + if self.model is not None: + self._class_names: Optional[List[str]] = self.model.get_class_names() + self._image_processor: Optional[Processing] = self.model.get_processing_params() + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=tile_nms_iou, + conf=tile_nms_conf, + nms_top_k=tile_nms_top_k, + max_predictions=tile_nms_max_predictions, + multi_label_per_box=tile_nms_multi_label_per_box, + class_agnostic_nms=tile_nms_class_agnostic_nms, + ) + else: + self._class_names: Optional[List[str]] = None + self._image_processor: Optional[Processing] = None + self.sliding_window_post_prediction_callback = None self._default_nms_iou: float = tile_nms_iou self._default_nms_conf: float = tile_nms_conf self._default_nms_top_k: int = tile_nms_top_k @@ -75,14 +80,28 @@ def __init__( self._default_multi_label_per_box = tile_nms_multi_label_per_box self._default_class_agnostic_nms = tile_nms_class_agnostic_nms - def __call__(self, inputs: torch.Tensor, model: nn.Module = None, sliding_window_post_prediction_callback: DetectionPostPredictionCallback = None): - model = model or self.model - sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback + def __call__(self, inputs: torch.Tensor, model: CustomizableDetector = None): + if model is not None: + sliding_window_post_prediction_callback = model.get_post_prediction_callback( + conf=self._default_nms_conf, + iou=self._default_nms_iou, + nms_top_k=self._default_nms_top_k, + max_predictions=self._default_max_predictions, + multi_label_per_box=self._default_multi_label_per_box, + class_agnostic_nms=self._default_class_agnostic_nms, + ) + else: + model = self.model + sliding_window_post_prediction_callback = self.sliding_window_post_prediction_callback + if None in [model, sliding_window_post_prediction_callback]: - raise RuntimeError("model and sliding_window_post_prediction_callback must be passed when model is not " "passed in __init__.") + raise RuntimeError("model must be passed explicitly if not passed in __init__ ") + + return self.forward_with_explicit_model_and_post_prediction_callback(inputs, model, sliding_window_post_prediction_callback) + + def forward_with_explicit_model_and_post_prediction_callback(self, inputs, model, sliding_window_post_prediction_callback): batch_size, _, _, _ = inputs.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch - # Generate and process each tile for img_idx in range(batch_size): single_image = inputs[img_idx : img_idx + 1] # Extract each image @@ -96,7 +115,6 @@ def __call__(self, inputs: torch.Tensor, model: nn.Module = None, sliding_window if len(img_i_tile_detections) > 0: img_i_tile_detections[:, :4] += torch.tensor([start_x, start_y, start_x, start_y], device=tile.device) all_detections[img_idx].append(img_i_tile_detections) - # Concatenate and apply global NMS for each image's detections final_detections = [] for detections in all_detections: @@ -115,6 +133,8 @@ def __call__(self, inputs: torch.Tensor, model: nn.Module = None, sliding_window final_detections.append(torch.empty(0, 6).to(inputs.device)) # Empty tensor for images with no detections return final_detections + # def forward_with_explicit_post_prediction_callback(self, ): + def _generate_tiles(self, image, tile_size, tile_step): _, _, h, w = image.shape tiles = [] @@ -153,7 +173,7 @@ def get_post_prediction_callback( If False NMS is performed separately for each class. :return: """ - if self.model: + if self.model is not None: return self.model.get_post_prediction_callback( conf=conf, iou=iou, diff --git a/src/super_gradients/training/metrics/detection_metrics.py b/src/super_gradients/training/metrics/detection_metrics.py index fd99451e91..7c51215fb3 100755 --- a/src/super_gradients/training/metrics/detection_metrics.py +++ b/src/super_gradients/training/metrics/detection_metrics.py @@ -33,6 +33,8 @@ class DetectionMetrics(Metric): :param num_cls: Number of classes. :param post_prediction_callback: DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS). + When None, the direct outputs of the model will be used. + :param normalize_targets: Whether to normalize bbox coordinates by image size. :param iou_thres: IoU threshold to compute the mAP. Could be either instance of IouThreshold, a tuple (lower bound, upper_bound) or single scalar. diff --git a/tests/unit_tests/detection_sliding_window_wrapper_test.py b/tests/unit_tests/detection_sliding_window_wrapper_test.py index a74ae6aeb7..fe8431ec29 100644 --- a/tests/unit_tests/detection_sliding_window_wrapper_test.py +++ b/tests/unit_tests/detection_sliding_window_wrapper_test.py @@ -1,9 +1,53 @@ import unittest +from pathlib import Path +from super_gradients.training import models +from super_gradients.training.dataloaders import coco2017_val_yolo_nas +from super_gradients.training import Trainer +from super_gradients.training.forward_wrappers.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper +from super_gradients.training.metrics import DetectionMetrics +from super_gradients.training import training_hyperparams -class TestSlidingForwardWrapper(unittest.TestCase): - def test_something(self): - self.assertEqual(True, False) + +class SlidingWindowWrapperTest(unittest.TestCase): + def setUp(self): + self.mini_coco_data_dir = str(Path(__file__).parent.parent / "data" / "tinycoco") + + def test_train_with_sliding_window_wrapper_validation(self): + train_params = training_hyperparams.get("coco2017_yolo_nas_s") + + train_params["valid_metrics_list"] = [ + DetectionMetrics( + normalize_targets=True, + post_prediction_callback=None, + num_cls=80, + ) + ] + train_params["max_epochs"] = 2 + train_params["lr_warmup_epochs"] = 0 + train_params["lr_cooldown_epochs"] = 0 + train_params["average_best_models"] = False + train_params["mixed_precision"] = False + train_params["validation_forward_wrapper"] = SlidingWindowInferenceDetectionWrapper(tile_size=320, tile_step=160, tile_nms_iou=0.65, tile_nms_conf=0.03) + + dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=self.mini_coco_data_dir)) + + trainer = Trainer("test_yolo_nas_s_coco_with_sliding_window") + model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") + trainer.train(model=model, training_params=train_params, train_loader=dl, valid_loader=dl) + + def test_yolo_nas_s_coco_with_sliding_window(self): + trainer = Trainer("test_yolo_nas_s_coco_with_sliding_window") + model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") + model = SlidingWindowInferenceDetectionWrapper(tile_size=320, tile_step=160, model=model, tile_nms_iou=0.65, tile_nms_conf=0.03) + dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=self.mini_coco_data_dir)) + metric = DetectionMetrics( + normalize_targets=True, + post_prediction_callback=None, + num_cls=80, + ) + metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric]) + self.assertAlmostEqual(metric_values[metric.map_str], 0.331, delta=0.001) if __name__ == "__main__": From ebfefd1fd4bc1186b09e0872d172a245745239f6 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 16 May 2024 11:06:21 +0300 Subject: [PATCH 16/22] commit changes before removal of validation during training support --- .../abstract_forward_wrapper_model.py | 17 ----------------- .../sliding_window_detection_forward_wrapper.py | 13 ++++++++----- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py index 851934ba20..4090532ff8 100644 --- a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py +++ b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py @@ -1,17 +1,8 @@ import abc import torch -from torch import nn class AbstractForwardWrapperModel(abc.ABC): - def __init__(self, model: nn.Module = None): - """ - Initialize the AbstractForwardWrapperModel with an optional PyTorch model. - - :param model: An instance of nn.Module to be wrapped by this class, default is None. - """ - self.model = model - @abc.abstractmethod def __call__(self, inputs: torch.Tensor) -> torch.Tensor: """ @@ -21,11 +12,3 @@ def __call__(self, inputs: torch.Tensor) -> torch.Tensor: :return: A torch.Tensor containing the model's output. """ raise NotImplementedError("Subclasses must implement this method") - - def set_model(self, model: nn.Module): - """ - Set the model for this wrapper. - - :param model: An instance of nn.Module to be used by this wrapper. - """ - self.model = model diff --git a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py index 004692cc10..49d32189ef 100644 --- a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py @@ -51,6 +51,7 @@ def __init__( :param tile_nms_class_agnostic_nms: """ super().__init__() + self.model = model self.tile_size = tile_size @@ -133,8 +134,6 @@ def forward_with_explicit_model_and_post_prediction_callback(self, inputs, model final_detections.append(torch.empty(0, 6).to(inputs.device)) # Empty tensor for images with no detections return final_detections - # def forward_with_explicit_post_prediction_callback(self, ): - def _generate_tiles(self, image, tile_size, tile_step): _, _, h, w = image.shape tiles = [] @@ -183,7 +182,10 @@ def get_post_prediction_callback( class_agnostic_nms=class_agnostic_nms, ) else: - return None + raise RuntimeError( + "self.model must not be None before calling get_post_prediction_callback(). Pass " + "instantiated CustomizableDetector through the 'model' arg on __init__" + ) @resolve_param("image_processor", ProcessingFactory()) def set_dataset_processing_params( @@ -272,7 +274,9 @@ def _get_pipeline( """ if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf): raise RuntimeError( - "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first." + "You must set the dataset processing parameters before calling predict.\n" + "Please call " + "`model.set_dataset_processing_params(...)` first or do so on self.model. " ) iou = self._default_nms_iou if iou is None else iou @@ -368,7 +372,6 @@ def predict_webcam( :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. :param conf: (Optional) Below the confidence threshold, prediction are discarded. If None, the default value associated to the training is used. - :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. From aa7d0cb77d99918898270f45f6db4ae29f73ed1e Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 16 May 2024 16:03:50 +0300 Subject: [PATCH 17/22] refined docs --- .../factories/forward_wrappers_factory.py | 7 - .../common/registry/registry.py | 4 - .../sliding_sindow_detection_predict.py | 4 +- .../default_train_params.yaml | 2 - .../training/forward_wrappers/__init__.py | 0 .../abstract_forward_wrapper_model.py | 14 -- .../detection_models/customizable_detector.py | 12 ++ ...liding_window_detection_forward_wrapper.py | 140 ++++++++---------- src/super_gradients/training/params.py | 4 +- .../training/sg_trainer/sg_trainer.py | 10 +- .../detection_sliding_window_wrapper_test.py | 2 +- 11 files changed, 79 insertions(+), 120 deletions(-) delete mode 100644 src/super_gradients/common/factories/forward_wrappers_factory.py delete mode 100644 src/super_gradients/training/forward_wrappers/__init__.py delete mode 100644 src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py rename src/super_gradients/training/{forward_wrappers => models/detection_models}/sliding_window_detection_forward_wrapper.py (79%) diff --git a/src/super_gradients/common/factories/forward_wrappers_factory.py b/src/super_gradients/common/factories/forward_wrappers_factory.py deleted file mode 100644 index a03539a000..0000000000 --- a/src/super_gradients/common/factories/forward_wrappers_factory.py +++ /dev/null @@ -1,7 +0,0 @@ -from super_gradients.common.factories.base_factory import BaseFactory -from super_gradients.common.registry.registry import FORWARD_WRAPPERS - - -class ForwardWrappersFactory(BaseFactory): - def __init__(self): - super().__init__(FORWARD_WRAPPERS) diff --git a/src/super_gradients/common/registry/registry.py b/src/super_gradients/common/registry/registry.py index 2e4bde6f1a..e303f3766f 100644 --- a/src/super_gradients/common/registry/registry.py +++ b/src/super_gradients/common/registry/registry.py @@ -160,10 +160,6 @@ def warn_if_deprecated(name: str, registry: dict): ALL_COLLATE_FUNCTIONS = {} register_collate_function = create_register_decorator(registry=ALL_COLLATE_FUNCTIONS) -FORWARD_WRAPPERS = {} -register_forward_wrapper = create_register_decorator(registry=FORWARD_WRAPPERS) - - SAMPLERS = { Samplers.DISTRIBUTED: torch.utils.data.DistributedSampler, Samplers.SEQUENTIAL: torch.utils.data.SequentialSampler, diff --git a/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py b/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py index eee3daee57..12e53a7f48 100644 --- a/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py +++ b/src/super_gradients/examples/predict/sliding_sindow_detection_predict.py @@ -4,14 +4,14 @@ # Note that currently only YoloX, PPYoloE and YOLO-NAS are supported. -from super_gradients.training.forward_wrappers.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper +from super_gradients.training.models.detection_models.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco") # We want to use cuda if available to speed up inference. model = model.to("cuda" if torch.cuda.is_available() else "cpu") -model = SlidingWindowInferenceDetectionWrapper(model) +model = SlidingWindowInferenceDetectionWrapper(model=model, tile_size=640, tile_step=160, tile_nms_conf=0.35) predictions = model.predict( "https://images.pexels.com/photos/7968254/pexels-photo-7968254.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", skip_image_resizing=True diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml index 2164c632e6..6b560df81c 100644 --- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml @@ -109,8 +109,6 @@ max_train_batches: # For debug- when not None- will break out of inner train lo max_valid_batches: # For debug- when not None- will break out of inner valid loop # (i.e iterating over valid_loader) when reaching this number of batches. -validation_forward_wrapper: None # callable that expects - images: torch.Tensor, model: nn.Module and will replace the model's forward during validation. - sg_logger: base_sg_logger sg_logger_params: tb_files_user_prompt: False # Asks User for Tensorboard Deletion Prompt diff --git a/src/super_gradients/training/forward_wrappers/__init__.py b/src/super_gradients/training/forward_wrappers/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py b/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py deleted file mode 100644 index 4090532ff8..0000000000 --- a/src/super_gradients/training/forward_wrappers/abstract_forward_wrapper_model.py +++ /dev/null @@ -1,14 +0,0 @@ -import abc -import torch - - -class AbstractForwardWrapperModel(abc.ABC): - @abc.abstractmethod - def __call__(self, inputs: torch.Tensor) -> torch.Tensor: - """ - Abstract method to be implemented by subclasses that defines the forward pass. - - :param inputs: A torch.Tensor containing the input to the model. - :return: A torch.Tensor containing the model's output. - """ - raise NotImplementedError("Subclasses must implement this method") diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index bcd625a1e4..2af34699d5 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -194,6 +194,18 @@ def set_dataset_processing_params( if class_agnostic_nms is not None: self._default_class_agnostic_nms = bool(class_agnostic_nms) + def get_dataset_processing_params(self): + return dict( + class_names=self._class_names, + image_processor=self._image_processor, + iou=self._default_nms_iou, + conf=self._default_nms_iou, + nms_top_k=self._default_nms_top_k, + max_predictions=self._default_max_predictions, + multi_label_per_box=self._multi_label_per_box, + class_agnostic_nms=self._default_class_agnostic_nms, + ) + def get_processing_params(self) -> Optional[Processing]: return self._image_processor diff --git a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py similarity index 79% rename from src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py rename to src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py index 49d32189ef..a0847c548c 100644 --- a/src/super_gradients/training/forward_wrappers/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py @@ -5,9 +5,7 @@ from torch import nn from super_gradients.common.decorators.factory_decorator import resolve_param from super_gradients.common.factories.processing_factory import ProcessingFactory -from super_gradients.common.registry.registry import register_forward_wrapper from super_gradients.module_interfaces import HasPredict -from super_gradients.training.forward_wrappers.abstract_forward_wrapper_model import AbstractForwardWrapperModel from super_gradients.training.models import CustomizableDetector from super_gradients.training.utils.predict import ImagesDetectionPrediction from super_gradients.training.pipelines.pipelines import SlidingWindowDetectionPipeline @@ -17,52 +15,62 @@ from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback -@register_forward_wrapper("SlidingWindowInferenceDetectionWrapper") -class SlidingWindowInferenceDetectionWrapper(HasPredict, AbstractForwardWrapperModel, nn.Module): +class SlidingWindowInferenceDetectionWrapper(HasPredict, nn.Module): """ - A customizable detector with backbone -> neck -> heads - Each submodule with its parameters must be defined explicitly. - Modules should follow the interface of BaseDetectionModule + Implements a sliding window inference wrapper for a customizable detector. + + Parameters: + tile_size (int): The size of each square tile (in pixels) used in the sliding window. + tile_step (int): The step size (in pixels) between consecutive tiles in the sliding window. + model (CustomizableDetector): The detection model to which the sliding window inference is applied. + min_tile_threshold (int): Minimum dimension size for edge tiles before padding is applied. + If the remainder of the image (after the full tiles have been applied) + is smaller than this threshold, it will not be processed. + tile_nms_iou (Optional[float]): IoU threshold for Non-Maximum Suppression (NMS) of bounding boxes. + Defaults to the model's internal setting if None. + tile_nms_conf (Optional[float]): Confidence threshold for predictions to consider in post-processing. + Defaults to the model's internal setting if None. + tile_nms_top_k (Optional[int]): Maximum number of top-scoring detections to consider for NMS in each tile. + Defaults to the model's internal setting if None. + tile_nms_max_predictions (Optional[int]): Maximum number of detections to return from each tile. + Defaults to the model's internal setting if None. + tile_nms_multi_label_per_box (Optional[bool]): Allows multiple labels per box if True. Each anchor can produce + multiple labels of different classes that pass the confidence threshold. + Only the highest-scoring class is considered per anchor if False. + Defaults to the model's internal setting if None. + tile_nms_class_agnostic_nms (Optional[bool]): Performs class-agnostic NMS if True, where the IoU of boxes across + different classes is considered. Performs class-specific NMS if False. + Defaults to the model's internal setting if None. """ def __init__( self, tile_size: int, tile_step: int, - model: Optional[CustomizableDetector] = None, - min_tile_threshold=30, - tile_nms_iou: float = 0.7, - tile_nms_conf: float = 0.5, - tile_nms_top_k: int = 1024, - tile_nms_max_predictions=300, - tile_nms_multi_label_per_box=True, - tile_nms_class_agnostic_nms=False, + model: Optional[CustomizableDetector], + min_tile_threshold: int = 30, + tile_nms_iou: Optional[float] = None, + tile_nms_conf: Optional[float] = None, + tile_nms_top_k: Optional[int] = None, + tile_nms_max_predictions: Optional[int] = None, + tile_nms_multi_label_per_box: Optional[bool] = None, + tile_nms_class_agnostic_nms: Optional[bool] = None, ): - """ - :param tile_size: - :param tile_step: - :param min_tile_threshold: - :param tile_nms_iou: - :param tile_nms_conf: - :param tile_nms_top_k: - :param tile_nms_max_predictions: - :param tile_nms_multi_label_per_box: - :param tile_nms_class_agnostic_nms: - """ super().__init__() - - self.model = model - self.tile_size = tile_size self.tile_step = tile_step self.min_tile_threshold = min_tile_threshold # Processing params - if self.model is not None: - self._class_names: Optional[List[str]] = self.model.get_class_names() - self._image_processor: Optional[Processing] = self.model.get_processing_params() - self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + self.model = model + self.set_dataset_processing_params(**self.model.get_dataset_processing_params()) + + if any( + arg is not None + for arg in [tile_nms_iou, tile_nms_conf, tile_nms_top_k, tile_nms_max_predictions, tile_nms_multi_label_per_box, tile_nms_class_agnostic_nms] + ): + self.set_dataset_processing_params( iou=tile_nms_iou, conf=tile_nms_conf, nms_top_k=tile_nms_top_k, @@ -70,37 +78,19 @@ def __init__( multi_label_per_box=tile_nms_multi_label_per_box, class_agnostic_nms=tile_nms_class_agnostic_nms, ) - else: - self._class_names: Optional[List[str]] = None - self._image_processor: Optional[Processing] = None - self.sliding_window_post_prediction_callback = None - self._default_nms_iou: float = tile_nms_iou - self._default_nms_conf: float = tile_nms_conf - self._default_nms_top_k: int = tile_nms_top_k - self._default_max_predictions = tile_nms_max_predictions - self._default_multi_label_per_box = tile_nms_multi_label_per_box - self._default_class_agnostic_nms = tile_nms_class_agnostic_nms - - def __call__(self, inputs: torch.Tensor, model: CustomizableDetector = None): - if model is not None: - sliding_window_post_prediction_callback = model.get_post_prediction_callback( - conf=self._default_nms_conf, - iou=self._default_nms_iou, - nms_top_k=self._default_nms_top_k, - max_predictions=self._default_max_predictions, - multi_label_per_box=self._default_multi_label_per_box, - class_agnostic_nms=self._default_class_agnostic_nms, - ) - else: - model = self.model - sliding_window_post_prediction_callback = self.sliding_window_post_prediction_callback - if None in [model, sliding_window_post_prediction_callback]: - raise RuntimeError("model must be passed explicitly if not passed in __init__ ") + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=self._default_nms_iou, + conf=self._default_nms_conf, + nms_top_k=self._default_nms_top_k, + max_predictions=self._default_max_predictions, + multi_label_per_box=self._default_multi_label_per_box, + class_agnostic_nms=self._default_class_agnostic_nms, + ) - return self.forward_with_explicit_model_and_post_prediction_callback(inputs, model, sliding_window_post_prediction_callback) + def forward(self, inputs: torch.Tensor, sliding_window_post_prediction_callback: Optional[DetectionPostPredictionCallback] = None) -> List[torch.Tensor]: - def forward_with_explicit_model_and_post_prediction_callback(self, inputs, model, sliding_window_post_prediction_callback): + sliding_window_post_prediction_callback = sliding_window_post_prediction_callback or self.sliding_window_post_prediction_callback batch_size, _, _, _ = inputs.shape all_detections = [[] for _ in range(batch_size)] # Create a list for each image in the batch # Generate and process each tile @@ -108,7 +98,7 @@ def forward_with_explicit_model_and_post_prediction_callback(self, inputs, model single_image = inputs[img_idx : img_idx + 1] # Extract each image tiles = self._generate_tiles(single_image, self.tile_size, self.tile_step) for tile, (start_x, start_y) in tiles: - tile_detections = model(tile) + tile_detections = self.model(tile) # Apply local NMS using post_prediction_callback tile_detections = sliding_window_post_prediction_callback(tile_detections) # Adjust detections to global image coordinates @@ -164,28 +154,22 @@ def get_post_prediction_callback( :param conf: A minimum confidence threshold for predictions to be used in post-processing. :param iou: A IoU threshold for boxes non-maximum suppression. - :param nms_top_k: The maximum number of detections to consider for NMS. - :param max_predictions: The maximum number of detections to return. + :param nms_top_k: The maximum number of detections to consider for the NMS applied on each tile. + :param max_predictions: The maximum number of detections to return in each tile. :param multi_label_per_box: If True, each anchor can produce multiple labels of different classes. If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. :return: """ - if self.model is not None: - return self.model.get_post_prediction_callback( - conf=conf, - iou=iou, - nms_top_k=nms_top_k, - max_predictions=max_predictions, - multi_label_per_box=multi_label_per_box, - class_agnostic_nms=class_agnostic_nms, - ) - else: - raise RuntimeError( - "self.model must not be None before calling get_post_prediction_callback(). Pass " - "instantiated CustomizableDetector through the 'model' arg on __init__" - ) + return self.model.get_post_prediction_callback( + conf=conf, + iou=iou, + nms_top_k=nms_top_k, + max_predictions=max_predictions, + multi_label_per_box=multi_label_per_box, + class_agnostic_nms=class_agnostic_nms, + ) @resolve_param("image_processor", ProcessingFactory()) def set_dataset_processing_params( diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py index 4df63abffa..30f3c90c03 100755 --- a/src/super_gradients/training/params.py +++ b/src/super_gradients/training/params.py @@ -81,10 +81,8 @@ "options": None, # A dictionary of options to pass to the backend. "disable": False, # Turn torch.compile() into a no-op for testing }, # torch.compile options from https://pytorch.org/docs/stable/generated/torch.compile.html - "finetune": False, # Whether to freeze a fixed part of the model (supported only for models that implement + "finetune": False # Whether to freeze a fixed part of the model (supported only for models that implement # get_finetune_lr_dict, see SgModule.get_finetune_lr_dict. Tailored for each model class.) - "validation_forward_wrapper": None # callable that expects - inputs: torch.Tensor, model: nn.Module and will - # replace the model's forward during validation. } DEFAULT_OPTIMIZER_PARAMS_SGD = {"weight_decay": 1e-4, "momentum": 0.9} diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py index fcc037a28e..566228bc64 100755 --- a/src/super_gradients/training/sg_trainer/sg_trainer.py +++ b/src/super_gradients/training/sg_trainer/sg_trainer.py @@ -28,7 +28,6 @@ validate_run_id, get_checkpoints_dir_path, ) -from super_gradients.common.factories.forward_wrappers_factory import ForwardWrappersFactory from super_gradients.module_interfaces import HasPreprocessingParams, HasPredict from super_gradients.modules.repvgg_block import fuse_repvgg_blocks_residual_branches from super_gradients.import_utils import import_pytorch_quantization_or_install @@ -171,7 +170,6 @@ def __init__(self, experiment_name: str, device: Optional[str] = None, multi_gpu self.phase_callbacks = None self.checkpoint_params = None self.pre_prediction_callback = None - self.validation_forward_wrapper = None # SET THE DEFAULT PROPERTIES self.half_precision = False @@ -1420,8 +1418,6 @@ def get_finetune_lr_dict(self, lr: float) -> Dict[str, float]: self.pre_prediction_callback = CallbacksFactory().get(self.training_params.pre_prediction_callback) - self.validation_forward_wrapper = ForwardWrappersFactory().get(self.training_params.validation_forward_wrapper) - self.training_params.mixed_precision = self._initialize_mixed_precision(self.training_params.mixed_precision) self.ckpt_best_name = self.training_params.ckpt_best_name @@ -2290,11 +2286,7 @@ def evaluate( else: self.phase_callback_handler.on_test_batch_start(context) - if self.validation_forward_wrapper is not None: - output = self.validation_forward_wrapper(inputs=inputs, model=self.net) - else: - output = self.net(inputs) - + output = self.net(inputs) context.update_context(preds=output) if self.criterion is not None: diff --git a/tests/unit_tests/detection_sliding_window_wrapper_test.py b/tests/unit_tests/detection_sliding_window_wrapper_test.py index fe8431ec29..13fc6c9d9b 100644 --- a/tests/unit_tests/detection_sliding_window_wrapper_test.py +++ b/tests/unit_tests/detection_sliding_window_wrapper_test.py @@ -4,7 +4,7 @@ from super_gradients.training import models from super_gradients.training.dataloaders import coco2017_val_yolo_nas from super_gradients.training import Trainer -from super_gradients.training.forward_wrappers.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper +from super_gradients.training.models.detection_models.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper from super_gradients.training.metrics import DetectionMetrics from super_gradients.training import training_hyperparams From 7f3a0d402c586ef36522fade8da705db23470214 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 20 May 2024 10:41:38 +0300 Subject: [PATCH 18/22] removed old test for forward wrapper, fixed defaults --- .../detection_models/customizable_detector.py | 2 +- ...liding_window_detection_forward_wrapper.py | 9 +++++++ .../detection_sliding_window_wrapper_test.py | 24 ------------------- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index 2af34699d5..92bfe9b23c 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -202,7 +202,7 @@ def get_dataset_processing_params(self): conf=self._default_nms_iou, nms_top_k=self._default_nms_top_k, max_predictions=self._default_max_predictions, - multi_label_per_box=self._multi_label_per_box, + multi_label_per_box=self._default_multi_label_per_box, class_agnostic_nms=self._default_class_agnostic_nms, ) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py index a0847c548c..6189bff5e9 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py @@ -62,6 +62,15 @@ def __init__( self.tile_step = tile_step self.min_tile_threshold = min_tile_threshold + self._class_names: Optional[List[str]] = None + self._image_processor: Optional[Processing] = None + self._default_nms_iou: float = 0.7 + self._default_nms_conf: float = 0.5 + self._default_nms_top_k: int = 1024 + self._default_max_predictions = 300 + self._default_multi_label_per_box = True + self._default_class_agnostic_nms = False + # Processing params self.model = model self.set_dataset_processing_params(**self.model.get_dataset_processing_params()) diff --git a/tests/unit_tests/detection_sliding_window_wrapper_test.py b/tests/unit_tests/detection_sliding_window_wrapper_test.py index 13fc6c9d9b..10ac6b1d88 100644 --- a/tests/unit_tests/detection_sliding_window_wrapper_test.py +++ b/tests/unit_tests/detection_sliding_window_wrapper_test.py @@ -6,36 +6,12 @@ from super_gradients.training import Trainer from super_gradients.training.models.detection_models.sliding_window_detection_forward_wrapper import SlidingWindowInferenceDetectionWrapper from super_gradients.training.metrics import DetectionMetrics -from super_gradients.training import training_hyperparams class SlidingWindowWrapperTest(unittest.TestCase): def setUp(self): self.mini_coco_data_dir = str(Path(__file__).parent.parent / "data" / "tinycoco") - def test_train_with_sliding_window_wrapper_validation(self): - train_params = training_hyperparams.get("coco2017_yolo_nas_s") - - train_params["valid_metrics_list"] = [ - DetectionMetrics( - normalize_targets=True, - post_prediction_callback=None, - num_cls=80, - ) - ] - train_params["max_epochs"] = 2 - train_params["lr_warmup_epochs"] = 0 - train_params["lr_cooldown_epochs"] = 0 - train_params["average_best_models"] = False - train_params["mixed_precision"] = False - train_params["validation_forward_wrapper"] = SlidingWindowInferenceDetectionWrapper(tile_size=320, tile_step=160, tile_nms_iou=0.65, tile_nms_conf=0.03) - - dl = coco2017_val_yolo_nas(dataset_params=dict(data_dir=self.mini_coco_data_dir)) - - trainer = Trainer("test_yolo_nas_s_coco_with_sliding_window") - model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") - trainer.train(model=model, training_params=train_params, train_loader=dl, valid_loader=dl) - def test_yolo_nas_s_coco_with_sliding_window(self): trainer = Trainer("test_yolo_nas_s_coco_with_sliding_window") model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco") From 1056b231f6576cbb607a6e2458d4617ce77242e9 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 20 May 2024 10:49:32 +0300 Subject: [PATCH 19/22] fixed test and added clarifications --- ...liding_window_detection_forward_wrapper.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py index 6189bff5e9..d1b88cbf93 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py @@ -62,6 +62,7 @@ def __init__( self.tile_step = tile_step self.min_tile_threshold = min_tile_threshold + # GENERAL DEFAULTS self._class_names: Optional[List[str]] = None self._image_processor: Optional[Processing] = None self._default_nms_iou: float = 0.7 @@ -71,10 +72,11 @@ def __init__( self._default_multi_label_per_box = True self._default_class_agnostic_nms = False - # Processing params + # TAKE PROCESSING PARAMS FROM THE WRAPPED MODEL IF THEY ARE AVAILABLE, OTHERWISE USE THE GENERAL DEFAULTS self.model = model self.set_dataset_processing_params(**self.model.get_dataset_processing_params()) + # OVERRIDE WITH ANY EXPLICITLY PASSED PROCESSING PARAMS if any( arg is not None for arg in [tile_nms_iou, tile_nms_conf, tile_nms_top_k, tile_nms_max_predictions, tile_nms_multi_label_per_box, tile_nms_class_agnostic_nms] @@ -87,15 +89,16 @@ def __init__( multi_label_per_box=tile_nms_multi_label_per_box, class_agnostic_nms=tile_nms_class_agnostic_nms, ) + else: - self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( - iou=self._default_nms_iou, - conf=self._default_nms_conf, - nms_top_k=self._default_nms_top_k, - max_predictions=self._default_max_predictions, - multi_label_per_box=self._default_multi_label_per_box, - class_agnostic_nms=self._default_class_agnostic_nms, - ) + self.sliding_window_post_prediction_callback = self.get_post_prediction_callback( + iou=self._default_nms_iou, + conf=self._default_nms_conf, + nms_top_k=self._default_nms_top_k, + max_predictions=self._default_max_predictions, + multi_label_per_box=self._default_multi_label_per_box, + class_agnostic_nms=self._default_class_agnostic_nms, + ) def forward(self, inputs: torch.Tensor, sliding_window_post_prediction_callback: Optional[DetectionPostPredictionCallback] = None) -> List[torch.Tensor]: From 2981c236e4d2e274782defaec1f1b9c387b59ace Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 20 May 2024 11:31:54 +0300 Subject: [PATCH 20/22] forward wrapper test removed --- tests/deci_core_unit_test_suite_runner.py | 2 - tests/unit_tests/__init__.py | 2 - tests/unit_tests/forward_wrapper_test.py | 97 ----------------------- 3 files changed, 101 deletions(-) delete mode 100644 tests/unit_tests/forward_wrapper_test.py diff --git a/tests/deci_core_unit_test_suite_runner.py b/tests/deci_core_unit_test_suite_runner.py index fda112e5ee..27a92f1079 100644 --- a/tests/deci_core_unit_test_suite_runner.py +++ b/tests/deci_core_unit_test_suite_runner.py @@ -30,7 +30,6 @@ TestClassificationAdapter, TestDetectionAdapter, TestSegmentationAdapter, - TestForwardWrapper, ) from tests.end_to_end_tests import TestTrainer from tests.unit_tests.depth_estimation_dataset_test import DepthEstimationDatasetTest @@ -193,7 +192,6 @@ def _add_modules_to_unit_tests_suite(self): self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(ClassBalancerTest)) self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(ClassBalancedSamplerTest)) self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestSegmentationModelExport)) - self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestForwardWrapper)) def _add_modules_to_end_to_end_tests_suite(self): """ diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py index 9a39e2f069..c43384006f 100644 --- a/tests/unit_tests/__init__.py +++ b/tests/unit_tests/__init__.py @@ -2,7 +2,6 @@ from tests.unit_tests.crash_tips_test import CrashTipTest from tests.unit_tests.double_training_test import CallTrainTwiceTest from tests.unit_tests.factories_test import FactoriesTest -from tests.unit_tests.forward_wrapper_test import TestForwardWrapper from tests.unit_tests.optimizer_params_override_test import TrainOptimizerParamsOverride from tests.unit_tests.resume_training_test import ResumeTrainingTest from tests.unit_tests.strictload_enum_test import StrictLoadEnumTest @@ -64,5 +63,4 @@ "TestClassificationAdapter", "TestDetectionAdapter", "TestSegmentationAdapter", - "TestForwardWrapper", ] diff --git a/tests/unit_tests/forward_wrapper_test.py b/tests/unit_tests/forward_wrapper_test.py deleted file mode 100644 index 106d48ca73..0000000000 --- a/tests/unit_tests/forward_wrapper_test.py +++ /dev/null @@ -1,97 +0,0 @@ -import unittest - -from super_gradients.training import Trainer -from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader -from super_gradients.training.metrics import Accuracy -from super_gradients.training.models import LeNet -from super_gradients.training.utils.callbacks import PhaseContext, Callback -import torch - - -class OutputsCollectorCallback(Callback): - """ - Simple callback that collects validation and train outputs for testing - """ - - def __init__(self): - self.validation_outputs = [] - self.train_outputs = [] - - def on_validation_batch_end(self, context: PhaseContext) -> None: - self.validation_outputs.append(context.preds) - - def on_train_batch_end(self, context: PhaseContext) -> None: - self.train_outputs.append(context.preds) - - -class DummyForwardWrapper: - """ - Simple foward wrapper that ignores the model output and returns output of the same shape as the model filled with ones. - """ - - def __call__(self, inputs: torch.Tensor, model: torch.nn.Module): - return torch.ones_like(model(inputs)) - - -def compare_tensor_lists(list1, list2): - """ - Compares two lists of PyTorch tensors for element-wise equality, ensuring all operations are performed on the CPU. - - :param list1: The first list of tensors. Each element must be a PyTorch Tensor. - :param list2: The second list of tensors. Each element must be a PyTorch Tensor. - :return: True if all tensors in both lists are equal element-wise, False otherwise. - - Note: This function explicitly moves all tensors to the CPU before comparison, - making it suitable for environments where GPU resources are not desired or available. - """ - if len(list1) != len(list2): - return False - - # Move tensors to CPU - list1 = [t.cpu() for t in list1] - list2 = [t.cpu() for t in list2] - - for tensor1, tensor2 in zip(list1, list2): - if not torch.all(torch.eq(tensor1, tensor2)): - return False - return True - - -class TestForwardWrapper(unittest.TestCase): - def test_train_with_validation_forward_wrapper(self): - # Define Model - net = LeNet() - trainer = Trainer("test_train_with_validation_forward_wrapper") - output_collector = OutputsCollectorCallback() - validation_forward_wrapper = DummyForwardWrapper() - train_params = { - "max_epochs": 1, - "initial_lr": 0.1, - "loss": "CrossEntropyLoss", - "optimizer": "SGD", - "criterion_params": {}, - "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, - "train_metrics_list": [Accuracy()], - "valid_metrics_list": [Accuracy()], - "metric_to_watch": "Accuracy", - "greater_metric_to_watch_is_better": True, - "ema": False, - "phase_callbacks": [output_collector], - "warmup_mode": "LinearEpochLRWarmup", - "validation_forward_wrapper": validation_forward_wrapper, - "average_best_models": False, - } - - expected_outputs = [torch.ones(4, 10)] - trainer.train( - model=net, - training_params=train_params, - train_loader=classification_test_dataloader(batch_size=4), - valid_loader=classification_test_dataloader(batch_size=4), - ) - self.assertTrue(compare_tensor_lists(expected_outputs, output_collector.validation_outputs)) - self.assertFalse(compare_tensor_lists(expected_outputs, output_collector.train_outputs)) - - -if __name__ == "__main__": - unittest.main() From 0bcb821a17aa6a9e0499a52a16eb03160a10ecaf Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 20 May 2024 14:45:30 +0300 Subject: [PATCH 21/22] updated wrong threshold extraction and test result --- .../sliding_window_detection_forward_wrapper.py | 2 +- tests/unit_tests/detection_sliding_window_wrapper_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py index d1b88cbf93..3b3f3e448c 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py @@ -128,7 +128,7 @@ def forward(self, inputs: torch.Tensor, sliding_window_post_prediction_callback: pred_cls_conf = detections[:, 4] pred_cls_label = detections[:, 5] idx_to_keep = torchvision.ops.boxes.batched_nms( - boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=sliding_window_post_prediction_callback.score_threshold + boxes=pred_bboxes, scores=pred_cls_conf, idxs=pred_cls_label, iou_threshold=sliding_window_post_prediction_callback.nms_threshold ) final_detections.append(detections[idx_to_keep]) diff --git a/tests/unit_tests/detection_sliding_window_wrapper_test.py b/tests/unit_tests/detection_sliding_window_wrapper_test.py index 10ac6b1d88..b4f26e55c9 100644 --- a/tests/unit_tests/detection_sliding_window_wrapper_test.py +++ b/tests/unit_tests/detection_sliding_window_wrapper_test.py @@ -23,7 +23,7 @@ def test_yolo_nas_s_coco_with_sliding_window(self): num_cls=80, ) metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric]) - self.assertAlmostEqual(metric_values[metric.map_str], 0.331, delta=0.001) + self.assertAlmostEqual(metric_values[metric.map_str], 0.342, delta=0.001) if __name__ == "__main__": From 2d6331a24f0da1f494c01872b03d469d56339e42 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 20 May 2024 15:25:43 +0300 Subject: [PATCH 22/22] fixed docstring format --- ...liding_window_detection_forward_wrapper.py | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py index 3b3f3e448c..e31b6a1bdf 100644 --- a/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py +++ b/src/super_gradients/training/models/detection_models/sliding_window_detection_forward_wrapper.py @@ -19,28 +19,25 @@ class SlidingWindowInferenceDetectionWrapper(HasPredict, nn.Module): """ Implements a sliding window inference wrapper for a customizable detector. - Parameters: - tile_size (int): The size of each square tile (in pixels) used in the sliding window. - tile_step (int): The step size (in pixels) between consecutive tiles in the sliding window. - model (CustomizableDetector): The detection model to which the sliding window inference is applied. - min_tile_threshold (int): Minimum dimension size for edge tiles before padding is applied. - If the remainder of the image (after the full tiles have been applied) - is smaller than this threshold, it will not be processed. - tile_nms_iou (Optional[float]): IoU threshold for Non-Maximum Suppression (NMS) of bounding boxes. - Defaults to the model's internal setting if None. - tile_nms_conf (Optional[float]): Confidence threshold for predictions to consider in post-processing. - Defaults to the model's internal setting if None. - tile_nms_top_k (Optional[int]): Maximum number of top-scoring detections to consider for NMS in each tile. - Defaults to the model's internal setting if None. - tile_nms_max_predictions (Optional[int]): Maximum number of detections to return from each tile. - Defaults to the model's internal setting if None. - tile_nms_multi_label_per_box (Optional[bool]): Allows multiple labels per box if True. Each anchor can produce - multiple labels of different classes that pass the confidence threshold. - Only the highest-scoring class is considered per anchor if False. - Defaults to the model's internal setting if None. - tile_nms_class_agnostic_nms (Optional[bool]): Performs class-agnostic NMS if True, where the IoU of boxes across - different classes is considered. Performs class-specific NMS if False. - Defaults to the model's internal setting if None. + :param tile_size: (int) The size of each square tile (in pixels) used in the sliding window. + :param tile_step: (int) The step size (in pixels) between consecutive tiles in the sliding window. + :param model: (CustomizableDetector) The detection model to which the sliding window inference is applied. + :param min_tile_threshold: (int) Minimum dimension size for edge tiles before padding is applied. + If the remainder of the image (after the full tiles have been applied) is smaller than this threshold, + it will not be processed. + :param tile_nms_iou: (Optional[float]) IoU threshold for Non-Maximum Suppression (NMS) of bounding boxes. + Defaults to the model's internal setting if None. + :param tile_nms_conf: (Optional[float]) Confidence threshold for predictions to consider in post-processing. + Defaults to the model's internal setting if None. + :param tile_nms_top_k: (Optional[int]) Maximum number of top-scoring detections to consider for NMS in each tile. + Defaults to the model's internal setting if None. + :param tile_nms_max_predictions: (Optional[int]) Maximum number of detections to return from each tile. + Defaults to the model's internal setting if None. + :param tile_nms_multi_label_per_box: (Optional[bool]) Allows multiple labels per box if True. Each anchor can produce + multiple labels of different classes that pass the confidence threshold. Only the highest-scoring class is considered + per anchor if False. Defaults to the model's internal setting if None. + :param tile_nms_class_agnostic_nms: (Optional[bool]) Performs class-agnostic NMS if True, where the IoU of boxes across + different classes is considered. Performs class-specific NMS if False. Defaults to the model's internal setting if None. """ def __init__(