diff --git a/.github/workflows/pre_merge.yaml b/.github/workflows/pre_merge.yaml index 201aaf089e3..87c901cf16a 100644 --- a/.github/workflows/pre_merge.yaml +++ b/.github/workflows/pre_merge.yaml @@ -153,3 +153,32 @@ jobs: rm /tmp/requirements.txt - name: Run Integration Test run: tox -vv -e integration-test-${{ matrix.task }} + Integration-Test-Large: + if: | + github.event.pull_request.draft == false && + !(startsWith(github.event.pull_request.title, '[WIP]')) + runs-on: [self-hosted, linux, x64, dev, dmount] + needs: Unit-Test + strategy: + fail-fast: false + matrix: + include: + - task: "detection" + - task: "instance_segmentation" + - task: "semantic_segmentation" + name: Integration-Test-Large-${{ matrix.task }}-py310 + steps: + - name: Checkout repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: Install Python + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + with: + python-version: "3.10" + - name: Install tox + run: | + python -m pip install --require-hashes --no-deps -r .ci/requirements.txt + pip-compile --generate-hashes --output-file=/tmp/requirements.txt --extra=ci_tox pyproject.toml + python -m pip install --require-hashes --no-deps -r /tmp/requirements.txt + rm /tmp/requirements.txt + - name: Run Integration Test + run: tox -vv -e integration-test-${{ matrix.task }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d0bfad4116..f6573d58f3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,16 @@ All notable changes to this project will be documented in this file. () - Add GPU memory monitor hook () +- Add YOLOv9 model for Object Detection + (, ) +- Add OV inference for keypoint detection + () +- Add tiling for semantic segmentation + () +- Add 3D Object Detection task with MonoDETR model + () +- Add OpenVINO inference for 3D Object Detection task + () ### Enhancements @@ -29,29 +39,6 @@ All notable changes to this project will be documented in this file. () - Improve FMetric computation () - -### Bug fixes - -- Fix MaskDINO, MonoDETR recipes - () - -## \[2.3.0\] - -### New features - -- Add YOLOv9 model for Object Detection - (, ) -- Add OV inference for keypoint detection - () -- Add tiling for semantic segmentation - () -- Add 3D Object Detection task with MonoDETR model - () -- Add OpenVINO inference for 3D Object Detection task - () - -### Enhancements - - Upgrade OV, MAPI, and NNCF dependencies () - Instance Segmentation Model refactoring @@ -63,6 +50,8 @@ All notable changes to this project will be documented in this file. ### Bug fixes +- Fix MaskDINO, MonoDETR recipes + () - Fix a wrong HPO log () - Update model name in rotated detection recipes @@ -70,6 +59,35 @@ All notable changes to this project will be documented in this file. - Fix SupCon flag (https://github.com/openvinotoolkit/training_extensions/pull/4076) +## \[2.2.2\] + +### Bug fixes + +- BC improvement + () + +## \[2.2.1\] + +### Bug fixes + +- Fix empty annotation in tiling + () +- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification + () +- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head + () +- Fix DETR target class indices are of type long in loss calculations + () +- Fix arrow format reader for multiclass ROI case + () + +### Enhancements + +- Decouple DinoV2 for semantic segmentation task + () +- Update Label Info handling + () + ## \[2.2.0\] ### New features @@ -194,6 +212,8 @@ All notable changes to this project will be documented in this file. () - Disable tiling classifier toggle in configurable parameters () +- Fix Ellipse shapes for Instance Segmentation + () ## \[v2.1.0\] diff --git a/docs/source/guide/release_notes/index.rst b/docs/source/guide/release_notes/index.rst index e0b8dc86383..6a826256dcf 100644 --- a/docs/source/guide/release_notes/index.rst +++ b/docs/source/guide/release_notes/index.rst @@ -4,6 +4,35 @@ Releases .. toctree:: :maxdepth: 1 +v2.2.2 (2024.12) +---------------- + +Enhancements +^^^^^^^^^^^^ + +Bug fixes +^^^^^^^^^ + +- BC improvement + +v2.2.1 (2024.12) +---------------- + +Enhancements +^^^^^^^^^^^^ + +- Decouple DinoV2 for semantic segmentation task +- Update Label Info handling + +Bug fixes +^^^^^^^^^ + +- Fix empty annotation in tiling +- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification +- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head +- Fix DETR target class indices are of type long in loss calculations +- Fix arrow format reader for multiclass ROI case + v2.2.0 (2024.10) ---------------- @@ -22,6 +51,8 @@ New features - Add Semi-SL MeanTeacher algorithm for Semantic Segmentation - Update head and h-label format for hierarchical label classification - Support configurable input size +- Revert the old workaround for detection confidence threshold +- Add Keypoint Detection legacy template Enhancements ^^^^^^^^^^^^ @@ -40,21 +71,38 @@ Enhancements - Change sematic segmentation to consider bbox only annotations - Relieve memory usage criteria on batch size 2 during adaptive batch size - Remove background label from RT Info for segmentation task +- Enable export of the feature vectors for semantic segmentation task - Prevent using too low confidence thresholds in detection +- Update HPO interface +- Bump onnx to 1.17.0 to omit CVE-2024-5187 Bug fixes ^^^^^^^^^ +- Update anomaly base transforms to use square resizing - Fix Combined Dataloader & unlabeled warmup loss in Semi-SL - Revert #3579 to fix issues with replacing coco_instance with a different format in some dataset - Add num_devices in Engine for multi-gpu training - Add missing tile recipes and various tile recipe changes - Change categories mapping logic - Fix config converter for tiling +- Fix `BboxOverlaps2D` handling of empty ground-truth annotations in datasets. - Fix num_trials calculation on dataset length less than num_class - Fix out_features in HierarchicalCBAMClsHead - Fix multilabel_accuracy of MixedHLabelAccuracy - Fix wrong indices setting in HLabelInfo +- Add legacy template LiteHRNet_18 template +- Model templates: rename model_status value 'DISCONTINUED' to 'OBSOLETE' +- Update MRCNN model export to include feature vector and saliency map +- Upgrade MAPI in 2.2 +- Fix applying model's hparams when loading model from checkpoint +- Fix incorrect all_groups order configuration in HLabelInfo +- Fix RTDETR recipes +- Fix wrong model name in converter & template +- Fix RTMDet Inst Explain Mode +- Fix RTDETR Explain Mode +- Fix classification and semantic segmentation tasks, when ROI provided for images +- Disable tiling classifier toggle in configurable parameters v2.1.0 (2024.07) ---------------- diff --git a/src/otx/algo/callbacks/adaptive_early_stopping.py b/src/otx/algo/callbacks/adaptive_early_stopping.py index 0f95e3c277d..754c6cf7c36 100644 --- a/src/otx/algo/callbacks/adaptive_early_stopping.py +++ b/src/otx/algo/callbacks/adaptive_early_stopping.py @@ -20,7 +20,7 @@ def __init__( self, monitor: str, min_delta: float = 0.0, - patience: int = 3, + patience: int = 10, verbose: bool = False, mode: str = "min", strict: bool = True, diff --git a/src/otx/algo/classification/backbones/vision_transformer.py b/src/otx/algo/classification/backbones/vision_transformer.py index c60f2ded49e..1255abff0d1 100644 --- a/src/otx/algo/classification/backbones/vision_transformer.py +++ b/src/otx/algo/classification/backbones/vision_transformer.py @@ -5,6 +5,7 @@ """Copy from mmpretrain/models/backbones/vision_transformer.py.""" from __future__ import annotations +import math from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal @@ -46,6 +47,7 @@ "vit-huge", "dinov2-s", "dinov2-small", + "dinov2-small-seg", "dinov2-b", "dinov2-base", "dinov2-l", @@ -87,6 +89,7 @@ class VisionTransformer(BaseModule): norm_layer: Normalization layer. act_layer: MLP activation layer. block_fn: Transformer block layer. + interpolate_offset: work-around offset to apply when interpolating positional embeddings lora: Enable LoRA training. """ @@ -147,6 +150,17 @@ class VisionTransformer(BaseModule): "num_heads": 6, "reg_tokens": 4, "no_embed_class": True, + }, + ), + **dict.fromkeys( + ["dinov2-small-seg"], # segmentation + { + "patch_size": 14, + "embed_dim": 384, + "depth": 12, + "num_heads": 6, + "reg_tokens": 0, + "no_embed_class": False, "init_values": 1e-5, }, ), @@ -193,9 +207,9 @@ class VisionTransformer(BaseModule): def __init__( # noqa: PLR0913 self, - arch: VIT_ARCH_TYPE = "vit-base", + arch: VIT_ARCH_TYPE | str = "vit-base", img_size: int | tuple[int, int] = 224, - patch_size: int | tuple[int, int] | None = None, + patch_size: int | None = None, in_chans: int = 3, num_classes: int = 1000, embed_dim: int | None = None, @@ -221,6 +235,7 @@ def __init__( # noqa: PLR0913 mlp_layer: nn.Module | None = None, act_layer: LayerType | None = None, norm_layer: LayerType | None = None, + interpolate_offset: float = 0.1, lora: bool = False, ) -> None: super().__init__() @@ -231,7 +246,7 @@ def __init__( # noqa: PLR0913 arch_settings: dict[str, Any] = self.arch_zoo[arch] self.img_size: int | tuple[int, int] = img_size - self.patch_size: int | tuple[int, int] = patch_size or arch_settings.get("patch_size", 16) + self.patch_size: int = patch_size or arch_settings.get("patch_size", 16) self.embed_dim = embed_dim or arch_settings.get("embed_dim", 768) depth = depth or arch_settings.get("depth", 12) num_heads = num_heads or arch_settings.get("num_heads", 12) @@ -251,6 +266,7 @@ def __init__( # noqa: PLR0913 self.no_embed_class = no_embed_class # don't embed prefix positions (includes reg) self.dynamic_img_size = dynamic_img_size self.grad_checkpointing = False + self.interpolate_offset = interpolate_offset embed_args = {} if dynamic_img_size: @@ -353,15 +369,17 @@ def resize_positional_embeddings(pos_embed: torch.Tensor, new_shape: tuple[int, # convert dinov2 pretrained weights state_dict = torch.load(checkpoint_path) state_dict.pop("mask_token", None) - state_dict["reg_token"] = state_dict.pop("register_tokens") + if "reg_token" in state_dict: + state_dict["reg_token"] = state_dict.pop("register_tokens") state_dict["cls_token"] = state_dict.pop("cls_token") + state_dict["pos_embed"][:, 0] img_size = (self.img_size, self.img_size) if isinstance(self.img_size, int) else self.img_size - patch_size = (self.patch_size, self.patch_size) if isinstance(self.patch_size, int) else self.patch_size - state_dict["pos_embed"] = resize_positional_embeddings( - state_dict.pop("pos_embed")[:, 1:], - (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), - ) + patch_size = (self.patch_size, self.patch_size) + if state_dict["pos_embed"].shape != self.pos_embed.shape: + state_dict["pos_embed"] = resize_positional_embeddings( + state_dict.pop("pos_embed")[:, 1:], + (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), + ) self.load_state_dict(state_dict, strict=False) else: msg = f"Unsupported `checkpoint_extension` {checkpoint_ext}, please choose from 'npz' or 'pth'." @@ -401,6 +419,137 @@ def _pos_embed(self, x: torch.Tensor) -> torch.Tensor: return self.pos_drop(x) + def interpolate_pos_encoding(self, x: torch.Tensor, w: int, h: int) -> torch.Tensor: + """Interpolates the positional encoding to match the input dimensions. + + Args: + x (torch.Tensor): Input tensor. + w (int): Width of the input image. + h (int): Height of the input image. + + Returns: + torch.Tensor: Tensor with interpolated positional encoding. + """ + previous_dtype = x.dtype + npatch = x.shape[1] + n = self.pos_embed.shape[1] + if npatch == n and w == h: + return self.pos_embed + pos_embed = self.pos_embed.float() + class_pos_embed = pos_embed[:, 0] + patch_pos_embed = pos_embed[:, 1:] + dim = x.shape[-1] + w0 = w // self.patch_size + h0 = h // self.patch_size + m = int(math.sqrt(n)) # Recover the number of patches in each dimension + if m * m != n: + msg = f"Expected m * m to equal n, but got m={m}, n={n}" + raise ValueError(msg) + kwargs = {} + if self.interpolate_offset: + # fix float error by introducing small offset + sx = float(w0 + self.interpolate_offset) / m + sy = float(h0 + self.interpolate_offset) / m + kwargs["scale_factor"] = (sx, sy) + else: + # Simply specify an output size instead of a scale factor + kwargs["size"] = (w0, h0) + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed.reshape(1, m, m, dim).permute(0, 3, 1, 2), + mode="bicubic", + **kwargs, + ) + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype) + + def prepare_tokens_with_masks(self, x: torch.Tensor, masks: torch.Tensor | None = None) -> torch.Tensor: + """Prepare tokens with optional masks. + + Args: + x (torch.Tensor): Input tensor. + masks (torch.Tensor | None): Optional masks tensor. + + Returns: + torch.Tensor: Tensor with prepared tokens. + """ + _, _, w, h = x.shape + x = self.patch_embed(x) + if masks is not None: + x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x) + + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + self.interpolate_pos_encoding(x, w, h) + + if self.reg_token is not None: + x = torch.cat( + ( + x[:, :1], + self.reg_token.expand(x.shape[0], -1, -1), + x[:, 1:], + ), + dim=1, + ) + + return x + + def _get_intermediate_layers_not_chunked(self, x: torch.Tensor, n: int = 1) -> list[torch.Tensor]: + """Get intermediate layers without chunking. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + + Returns: + list[torch.Tensor]: List of intermediate layer outputs. + """ + x = self.prepare_tokens_with_masks(x) + # If n is an int, take the n last blocks. If it's a list, take them + output, total_block_len = [], len(self.blocks) + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in blocks_to_take: + output.append(x) + if len(output) != len(blocks_to_take): + msg = f"only {len(output)} / {len(blocks_to_take)} blocks found" + raise RuntimeError(msg) + return output + + def get_intermediate_layers( + self, + x: torch.Tensor, + n: int = 1, # Layers or n last layers to take + reshape: bool = False, + return_class_token: bool = False, + norm: bool = True, + ) -> tuple: + """Get intermediate layers of the VisionTransformer. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + reshape (bool): Whether to reshape the output feature maps. + return_class_token (bool): Whether to return the class token. + norm (bool): Whether to apply normalization to the outputs. + + Returns: + tuple: A tuple containing the intermediate layer outputs. + """ + outputs = self._get_intermediate_layers_not_chunked(x, n) + if norm: + outputs = [self.norm(out) for out in outputs] + class_tokens = [out[:, 0] for out in outputs] + outputs = [out[:, 1 + self.num_reg_tokens :] for out in outputs] + if reshape: + b, _, w, h = x.shape + outputs = [ + out.reshape(b, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous() + for out in outputs + ] + if return_class_token: + return tuple(zip(outputs, class_tokens)) + return tuple(outputs) + def forward( self, x: torch.Tensor, diff --git a/src/otx/algo/classification/efficientnet.py b/src/otx/algo/classification/efficientnet.py index d0ce1421b03..55a510aebec 100644 --- a/src/otx/algo/classification/efficientnet.py +++ b/src/otx/algo/classification/efficientnet.py @@ -14,7 +14,7 @@ from otx.algo.classification.backbones.efficientnet import EFFICIENTNET_VERSION, EfficientNetBackbone from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, + HierarchicalLinearClsHead, LinearClsHead, MultiLabelLinearClsHead, SemiSLLinearClsHead, @@ -272,11 +272,8 @@ def _build_model(self, head_config: dict) -> nn.Module: return HLabelClassifier( backbone=backbone, - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=backbone.num_features, - **copied_head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**copied_head_config, in_channels=backbone.num_features), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/mobilenet_v3.py b/src/otx/algo/classification/mobilenet_v3.py index c0e83840aa5..e16c849dc00 100644 --- a/src/otx/algo/classification/mobilenet_v3.py +++ b/src/otx/algo/classification/mobilenet_v3.py @@ -314,6 +314,7 @@ def _build_model(self, head_config: dict) -> nn.Module: copied_head_config = copy(head_config) copied_head_config["step_size"] = (ceil(self.input_size[0] / 32), ceil(self.input_size[1] / 32)) + in_channels = 960 if self.mode == "large" else 576 backbone = MobileNetV3Backbone(mode=self.mode, input_size=self.input_size) return HLabelClassifier( diff --git a/src/otx/algo/classification/torchvision_model.py b/src/otx/algo/classification/torchvision_model.py index e5931d9ce7f..effed540a54 100644 --- a/src/otx/algo/classification/torchvision_model.py +++ b/src/otx/algo/classification/torchvision_model.py @@ -14,12 +14,12 @@ from otx.algo.classification.backbones.torchvision import TorchvisionBackbone, TVModelType from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, LinearClsHead, MultiLabelLinearClsHead, SemiSLLinearClsHead, ) from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore +from otx.algo.classification.mobilenet_v3 import HierarchicalLinearClsHead from otx.algo.classification.necks.gap import GlobalAveragePooling from otx.algo.classification.utils import get_classification_layers from otx.core.data.entity.classification import ( @@ -315,11 +315,8 @@ def _build_model(self, head_config: dict) -> nn.Module: backbone = TorchvisionBackbone(backbone=self.backbone, pretrained=self.pretrained) return HLabelClassifier( backbone=backbone, - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=backbone.in_features, - **head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**head_config, in_channels=backbone.in_features), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/vit.py b/src/otx/algo/classification/vit.py index 55beb76deff..fd0ccc3c835 100644 --- a/src/otx/algo/classification/vit.py +++ b/src/otx/algo/classification/vit.py @@ -19,12 +19,12 @@ from otx.algo.classification.backbones.vision_transformer import VIT_ARCH_TYPE, VisionTransformer from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, MultiLabelLinearClsHead, SemiSLVisionTransformerClsHead, VisionTransformerClsHead, ) from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore +from otx.algo.classification.mobilenet_v3 import HierarchicalLinearClsHead from otx.algo.classification.utils import get_classification_layers from otx.algo.explain.explain_algo import ViTReciproCAM, feature_vector_fn from otx.algo.utils.support_otx_v1 import OTXv1Helper @@ -466,11 +466,7 @@ def _build_model(self, head_config: dict) -> nn.Module: return HLabelClassifier( backbone=vit_backbone, neck=None, - head=HierarchicalCBAMClsHead( - in_channels=vit_backbone.embed_dim, - step_size=1, - **head_config, - ), + head=HierarchicalLinearClsHead(**head_config, in_channels=vit_backbone.embed_dim), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), init_cfg=init_cfg, diff --git a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py index 4807e5b4a36..e12b1d1b678 100644 --- a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py +++ b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py @@ -196,7 +196,7 @@ def assign( assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1) assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long() max_overlaps = assigned_gt_inds.new_full((num_bboxes,), -INF, dtype=torch.float32) - max_overlaps[valid_mask] = matched_pred_ious + max_overlaps[valid_mask] = matched_pred_ious.to(max_overlaps) return AssignResult(num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) def dynamic_k_matching( diff --git a/src/otx/algo/detection/heads/rtmdet_head.py b/src/otx/algo/detection/heads/rtmdet_head.py index 2a04483dd09..a2ab8a95c82 100644 --- a/src/otx/algo/detection/heads/rtmdet_head.py +++ b/src/otx/algo/detection/heads/rtmdet_head.py @@ -491,7 +491,7 @@ def _get_targets_single( # type: ignore[override] if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes - bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_targets[pos_inds, :] = pos_bbox_targets.to(bbox_targets) labels[pos_inds] = sampling_result.pos_gt_labels if self.train_cfg["pos_weight"] <= 0: diff --git a/src/otx/algo/detection/losses/rtdetr_loss.py b/src/otx/algo/detection/losses/rtdetr_loss.py index 361dfaa75c0..d71ca7989dd 100644 --- a/src/otx/algo/detection/losses/rtdetr_loss.py +++ b/src/otx/algo/detection/losses/rtdetr_loss.py @@ -77,7 +77,7 @@ def loss_labels_vfl( src_logits = outputs["pred_logits"] target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) target_classes = torch.full(src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o + target_classes[idx] = target_classes_o.long() target = nn.functional.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1] target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype) diff --git a/src/otx/algo/instance_segmentation/utils/structures/mask/mask_target.py b/src/otx/algo/instance_segmentation/utils/structures/mask/mask_target.py index 3f82bbfbd6f..daf774e806d 100644 --- a/src/otx/algo/instance_segmentation/utils/structures/mask/mask_target.py +++ b/src/otx/algo/instance_segmentation/utils/structures/mask/mask_target.py @@ -63,6 +63,7 @@ def mask_target_single( mask_size: list[int], meta_info: dict, ) -> Tensor: +<<<<<<< HEAD """Compute mask target for each positive proposal in the image. Args: @@ -76,6 +77,10 @@ def mask_target_single( Tensor: Mask target, has shape (num_pos, w, h). """ mask_size = _pair(mask_size) +======= + """Compute mask target for each positive proposal in the image.""" + mask_size = _pair(cfg["mask_size"]) +>>>>>>> releases/2.2.0 if len(gt_masks) == 0: warnings.warn("No ground truth masks are provided!", stacklevel=2) return pos_proposals.new_zeros((0, *mask_size)) diff --git a/src/otx/algo/segmentation/backbones/__init__.py b/src/otx/algo/segmentation/backbones/__init__.py index 4c7a44cee9b..8b633cc21f8 100644 --- a/src/otx/algo/segmentation/backbones/__init__.py +++ b/src/otx/algo/segmentation/backbones/__init__.py @@ -3,8 +3,7 @@ # """Backbone modules for OTX segmentation model.""" -from .dinov2 import DinoVisionTransformer from .litehrnet import LiteHRNetBackbone from .mscan import MSCAN -__all__ = ["LiteHRNetBackbone", "DinoVisionTransformer", "MSCAN"] +__all__ = ["LiteHRNetBackbone", "MSCAN"] diff --git a/src/otx/algo/segmentation/backbones/dinov2.py b/src/otx/algo/segmentation/backbones/dinov2.py deleted file mode 100644 index ce1d605fe38..00000000000 --- a/src/otx/algo/segmentation/backbones/dinov2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -"""DINO-V2 model for the OTX classification.""" - -from __future__ import annotations - -import logging -import os -from functools import partial -from pathlib import Path - -import torch -from torch import nn - -from otx.algo.utils.mmengine_utils import load_checkpoint_to_model, load_from_http -from otx.utils.utils import get_class_initial_arguments - -logger = logging.getLogger() - - -class DinoVisionTransformer(nn.Module): - """DINO-v2 Model.""" - - def __init__( - self, - model_name: str, - freeze_backbone: bool, - out_index: list[int], - pretrained_weights: str | None = None, - ): - super().__init__() - self._init_args = get_class_initial_arguments() - - ci_data_root = os.environ.get("CI_DATA_ROOT") - pretrained: bool = True - if ci_data_root is not None and Path(ci_data_root).exists(): - pretrained = False - - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=pretrained) - - if ci_data_root is not None and Path(ci_data_root).exists(): - ckpt_filename = f"{model_name}4_pretrain.pth" - ckpt_path = Path(ci_data_root) / "torch" / "hub" / "checkpoints" / ckpt_filename - if not ckpt_path.exists(): - msg = ( - f"Internal cache was specified but cannot find weights file: {ckpt_filename}. load from torch hub." - ) - logger.warning(msg) - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=True) - else: - self.backbone.load_state_dict(torch.load(ckpt_path)) - - if freeze_backbone: - self._freeze_backbone(self.backbone) - - # take intermediate layers to preserve spatial dimension - self.backbone.forward = partial( - self.backbone.get_intermediate_layers, - n=out_index, - reshape=True, - ) - - if pretrained_weights is not None: - self.load_pretrained_weights(pretrained_weights) - - def _freeze_backbone(self, backbone: nn.Module) -> None: - """Freeze the backbone.""" - for _, v in backbone.named_parameters(): - v.requires_grad = False - - def init_weights(self) -> None: - """Initialize the weights.""" - # restrict rewriting backbone pretrained weights from torch.hub - # unless weights passed explicitly in config - if self.init_cfg: - return super().init_weights() - return None - - def forward(self, imgs: torch.Tensor) -> torch.Tensor: - """Forward function.""" - return self.backbone(imgs) - - def load_pretrained_weights(self, pretrained: str | None = None, prefix: str = "") -> None: - """Initialize weights.""" - checkpoint = None - if isinstance(pretrained, str) and Path(pretrained).exists(): - checkpoint = torch.load(pretrained, "cpu") - print(f"init weight - {pretrained}") - elif pretrained is not None: - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - checkpoint = load_from_http(filename=pretrained, map_location="cpu", model_dir=cache_dir) - print(f"init weight - {pretrained}") - if checkpoint is not None: - load_checkpoint_to_model(self, checkpoint, prefix=prefix) - - def __reduce__(self): - return (DinoVisionTransformer, self._init_args) diff --git a/src/otx/algo/segmentation/dino_v2_seg.py b/src/otx/algo/segmentation/dino_v2_seg.py index cb89a472ed7..70ffe23c9a7 100644 --- a/src/otx/algo/segmentation/dino_v2_seg.py +++ b/src/otx/algo/segmentation/dino_v2_seg.py @@ -5,9 +5,14 @@ from __future__ import annotations +from functools import partial +from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar +from urllib.parse import urlparse -from otx.algo.segmentation.backbones import DinoVisionTransformer +from torch.hub import download_url_to_file + +from otx.algo.classification.backbones.vision_transformer import VisionTransformer from otx.algo.segmentation.heads import FCNHead from otx.algo.segmentation.losses import CrossEntropyLossWithIgnore from otx.algo.segmentation.segmentors import BaseSegmentationModel @@ -21,18 +26,41 @@ class DinoV2Seg(OTXSegmentationModel): """DinoV2Seg Model.""" AVAILABLE_MODEL_VERSIONS: ClassVar[list[str]] = [ - "dinov2_vits14", + "dinov2-small-seg", ] + PRETRAINED_WEIGHTS: ClassVar[dict[str, str]] = { + "dinov2-small-seg": "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth", + } def _build_model(self) -> nn.Module: if self.model_name not in self.AVAILABLE_MODEL_VERSIONS: msg = f"Model version {self.model_name} is not supported." raise ValueError(msg) - - backbone = DinoVisionTransformer(model_name=self.model_name, freeze_backbone=True, out_index=[8, 9, 10, 11]) + backbone = VisionTransformer(arch=self.model_name, img_size=self.input_size) + backbone.forward = partial( # type: ignore[method-assign] + backbone.get_intermediate_layers, + n=[8, 9, 10, 11], + reshape=True, + ) decode_head = FCNHead(self.model_name, num_classes=self.num_classes) criterion = CrossEntropyLossWithIgnore(ignore_index=self.label_info.ignore_index) # type: ignore[attr-defined] + backbone.init_weights() + if self.model_name in self.PRETRAINED_WEIGHTS: + print(f"init weight - {self.PRETRAINED_WEIGHTS[self.model_name]}") + parts = urlparse(self.PRETRAINED_WEIGHTS[self.model_name]) + filename = Path(parts.path).name + + cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" + cache_file = cache_dir / filename + if not Path.exists(cache_file): + download_url_to_file(self.PRETRAINED_WEIGHTS[self.model_name], cache_file, "", progress=True) + backbone.load_pretrained(checkpoint_path=cache_file) + + # freeze backbone + for _, v in backbone.named_parameters(): + v.requires_grad = False + return BaseSegmentationModel( backbone=backbone, decode_head=decode_head, diff --git a/src/otx/algo/segmentation/heads/fcn_head.py b/src/otx/algo/segmentation/heads/fcn_head.py index 67b0fe6fc86..de877b2825a 100644 --- a/src/otx/algo/segmentation/heads/fcn_head.py +++ b/src/otx/algo/segmentation/heads/fcn_head.py @@ -216,7 +216,7 @@ class FCNHead: "aggregator_merge_norm": "None", "aggregator_use_concat": False, }, - "dinov2_vits14": { + "dinov2-small-seg": { "in_channels": [384, 384, 384, 384], "in_index": [0, 1, 2, 3], "input_transform": "resize_concat", @@ -233,7 +233,7 @@ def __new__(cls, model_name: str, num_classes: int) -> FCNHeadModule: normalization = ( partial(build_norm_layer, nn.SyncBatchNorm, requires_grad=True) - if model_name == "dinov2_vits14" + if model_name == "dinov2-small-seg" else partial(build_norm_layer, nn.BatchNorm2d, requires_grad=True) ) diff --git a/src/otx/algo/segmentation/litehrnet.py b/src/otx/algo/segmentation/litehrnet.py index fd153877ccd..3b146589803 100644 --- a/src/otx/algo/segmentation/litehrnet.py +++ b/src/otx/algo/segmentation/litehrnet.py @@ -87,7 +87,11 @@ def _exporter(self) -> OTXModelExporter: @property def ignore_scope(self) -> dict[str, Any]: """Get the ignored scope for LiteHRNet.""" +<<<<<<< HEAD if self.model_name == "lite_hrnet_x": +======= + if self.model_version == "lite_hrnet_x": +>>>>>>> releases/2.2.0 return { "ignored_scope": { "patterns": ["__module.model.decode_head.aggregator/*"], @@ -175,7 +179,11 @@ def ignore_scope(self) -> dict[str, Any]: "preset": "performance", } +<<<<<<< HEAD if self.model_name == "lite_hrnet_18": +======= + if self.model_version == "lite_hrnet_18": +>>>>>>> releases/2.2.0 return { "ignored_scope": { "patterns": ["__module.model.backbone/*"], @@ -263,7 +271,11 @@ def ignore_scope(self) -> dict[str, Any]: "preset": "mixed", } +<<<<<<< HEAD if self.model_name == "lite_hrnet_s": +======= + if self.model_version == "lite_hrnet_s": +>>>>>>> releases/2.2.0 return { "ignored_scope": { "names": [ diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py index 23391984423..4cfa4808487 100644 --- a/src/otx/core/data/dataset/action_classification.py +++ b/src/otx/core/data/dataset/action_classification.py @@ -37,6 +37,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.BGR, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: super().__init__( dm_subset, diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py index 1a0149423c6..091e7b4066d 100644 --- a/src/otx/core/data/dataset/anomaly.py +++ b/src/otx/core/data/dataset/anomaly.py @@ -57,6 +57,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.task_type = task_type super().__init__( diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py index 239a5ded307..5626690d6ca 100644 --- a/src/otx/core/data/dataset/base.py +++ b/src/otx/core/data/dataset/base.py @@ -70,6 +70,7 @@ class OTXDataset(Dataset, Generic[T_OTXDataEntity]): max_refetch: Maximum number of images to fetch in cache image_color_channel: Color channel of images stack_images: Whether or not to stack images in collate function in OTXBatchData entity. + data_format: Source data format, which was originally passed to datumaro (could be arrow for instance). """ @@ -83,6 +84,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.dm_subset = dm_subset self.transforms = transforms @@ -92,8 +94,11 @@ def __init__( self.image_color_channel = image_color_channel self.stack_images = stack_images self.to_tv_image = to_tv_image + self.data_format = data_format - if self.dm_subset.categories(): + if self.dm_subset.categories() and data_format == "arrow": + self.label_info = LabelInfo.from_dm_label_groups_arrow(self.dm_subset.categories()[AnnotationType.label]) + elif self.dm_subset.categories(): self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label]) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py index 8f4f5ffc241..40b9c4a99fd 100644 --- a/src/otx/core/data/dataset/classification.py +++ b/src/otx/core/data/dataset/classification.py @@ -39,7 +39,10 @@ def _get_item_impl(self, index: int) -> MulticlassClsDataEntity | None: labels_ids = [ label["label"]["_id"] for label in roi["labels"] if label["label"]["domain"] == "CLASSIFICATION" ] - label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids] + if self.data_format == "arrow": + label_anns = [self.label_info.label_ids.index(label_id) for label_id in labels_ids] + else: + label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids] else: # extract labels from annotations label_anns = [ann.label for ann in item.annotations if isinstance(ann, Label)] @@ -80,17 +83,21 @@ def _get_item_impl(self, index: int) -> MultilabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # multilabel information stored in 'multi_label_ids' attribute when the source format is arrow + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - labels = torch.as_tensor([ann.label for ann in label_anns]) + label_ids.add(label.label) + labels = torch.as_tensor(list(label_ids)) entity = MultilabelClsDataEntity( image=img_data, @@ -128,13 +135,22 @@ def __init__(self, **kwargs) -> None: self.dm_categories = self.dm_subset.categories()[AnnotationType.label] # Hlabel classification used HLabelInfo to insert the HLabelData. - self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + if self.data_format == "arrow": + # arrow format stores label IDs as names, have to deal with that here + self.label_info = HLabelInfo.from_dm_label_groups_arrow(self.dm_categories) + else: + self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + + self.id_to_name_mapping = dict(zip(self.label_info.label_ids, self.label_info.label_names)) + self.id_to_name_mapping[""] = "" + if self.label_info.num_multiclass_heads == 0: msg = "The number of multiclass heads should be larger than 0." raise ValueError(msg) - for dm_item in self.dm_subset: - self._add_ancestors(dm_item.annotations) + if self.data_format != "arrow": + for dm_item in self.dm_subset: + self._add_ancestors(dm_item.annotations) def _add_ancestors(self, label_anns: list[Label]) -> None: """Add ancestors recursively if some label miss the ancestor information. @@ -149,7 +165,7 @@ def _add_ancestors(self, label_anns: list[Label]) -> None: """ def _label_idx_to_name(idx: int) -> str: - return self.label_info.label_names[idx] + return self.dm_categories[idx].name def _label_name_to_idx(name: str) -> int: indices = [idx for idx, val in enumerate(self.label_info.label_names) if val == name] @@ -157,6 +173,8 @@ def _label_name_to_idx(name: str) -> int: def _get_label_group_idx(label_name: str) -> int: if isinstance(self.label_info, HLabelInfo): + if self.data_format == "arrow": + return self.label_info.class_to_group_idx[self.id_to_name_mapping[label_name]][0] return self.label_info.class_to_group_idx[label_name][0] msg = f"self.label_info should have HLabelInfo type, got {type(self.label_info)}" raise ValueError(msg) @@ -197,17 +215,22 @@ def _get_item_impl(self, index: int) -> HlabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # in h-cls scenario multilabel information stored in 'multi_label_ids' attribute + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - hlabel_labels = self._convert_label_to_hlabel_format(label_anns, ignored_labels) + label_ids.add(label.label) + + hlabel_labels = self._convert_label_to_hlabel_format([Label(label=idx) for idx in label_ids], ignored_labels) entity = HlabelClsDataEntity( image=img_data, @@ -256,18 +279,18 @@ def _convert_label_to_hlabel_format(self, label_anns: list[Label], ignored_label class_indices[i] = -1 for ann in label_anns: - ann_name = self.dm_categories.items[ann.label].name - ann_parent = self.dm_categories.items[ann.label].parent + if self.data_format == "arrow": + # skips unknown labels for instance, the empty one + if self.dm_categories.items[ann.label].name not in self.id_to_name_mapping: + continue + ann_name = self.id_to_name_mapping[self.dm_categories.items[ann.label].name] + else: + ann_name = self.dm_categories.items[ann.label].name group_idx, in_group_idx = self.label_info.class_to_group_idx[ann_name] - (parent_group_idx, parent_in_group_idx) = ( - self.label_info.class_to_group_idx[ann_parent] if ann_parent else (None, None) - ) if group_idx < num_multiclass_heads: class_indices[group_idx] = in_group_idx - if parent_group_idx is not None and parent_in_group_idx is not None: - class_indices[parent_group_idx] = parent_in_group_idx - elif not ignored_labels or ann.label not in ignored_labels: + elif ann.label not in ignored_labels: class_indices[num_multiclass_heads + in_group_idx] = 1 else: class_indices[num_multiclass_heads + in_group_idx] = -1 diff --git a/src/otx/core/data/dataset/instance_segmentation.py b/src/otx/core/data/dataset/instance_segmentation.py index 27384a3df9d..e5ffef6086e 100644 --- a/src/otx/core/data/dataset/instance_segmentation.py +++ b/src/otx/core/data/dataset/instance_segmentation.py @@ -5,13 +5,15 @@ from __future__ import annotations +import warnings +from collections import defaultdict from functools import partial from typing import Callable import numpy as np import torch +from datumaro import Bbox, Ellipse, Image, Polygon from datumaro import Dataset as DmDataset -from datumaro import Image, Polygon from torchvision import tv_tensors from otx.core.data.entity.base import ImageInfo @@ -41,24 +43,53 @@ def _get_item_impl(self, index: int) -> InstanceSegDataEntity | None: img = item.media_as(Image) ignored_labels: list[int] = [] img_data, img_shape, _ = self._get_img_data_and_shape(img) +<<<<<<< HEAD +======= + + anno_collection: dict[str, list] = defaultdict(list) + for anno in item.annotations: + anno_collection[anno.__class__.__name__].append(anno) +>>>>>>> releases/2.2.0 gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], [] - for annotation in item.annotations: - if isinstance(annotation, Polygon): - bbox = np.array(annotation.get_bbox(), dtype=np.float32) + # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363 + # Temporary solution to handle multiple annotation types. + # Ideally, we should pre-filter annotations during initialization of the dataset. + if Polygon.__name__ in anno_collection: # Polygon for InstSeg has higher priority + for poly in anno_collection[Polygon.__name__]: + bbox = Bbox(*poly.get_bbox()).points gt_bboxes.append(bbox) - gt_labels.append(annotation.label) + gt_labels.append(poly.label) if self.include_polygons: - gt_polygons.append(annotation) + gt_polygons.append(poly) else: - gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0]) - - # convert xywh to xyxy format - bboxes = np.array(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4)) - bboxes[:, 2:] += bboxes[:, :2] + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Bbox.__name__ in anno_collection: + bboxes = anno_collection[Bbox.__name__] + gt_bboxes = [ann.points for ann in bboxes] + gt_labels = [ann.label for ann in bboxes] + for box in bboxes: + poly = Polygon(box.as_polygon()) + if self.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Ellipse.__name__ in anno_collection: + for ellipse in anno_collection[Ellipse.__name__]: + bbox = Bbox(*ellipse.get_bbox()).points + gt_bboxes.append(bbox) + gt_labels.append(ellipse.label) + poly = Polygon(ellipse.as_polygon(num_points=10)) + if self.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + else: + warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2) + bboxes = np.stack(gt_bboxes, dtype=np.float32, axis=0) if gt_bboxes else np.empty((0, 4)) masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool) labels = np.array(gt_labels, dtype=np.int64) diff --git a/src/otx/core/data/dataset/keypoint_detection.py b/src/otx/core/data/dataset/keypoint_detection.py index c74b77c9319..bacb84bf643 100644 --- a/src/otx/core/data/dataset/keypoint_detection.py +++ b/src/otx/core/data/dataset/keypoint_detection.py @@ -54,9 +54,11 @@ def __init__( self.dm_subset = self._get_single_bbox_dataset(dm_subset) if self.dm_subset.categories(): + kp_labels = self.dm_subset.categories()[AnnotationType.points][0].labels self.label_info = LabelInfo( - label_names=self.dm_subset.categories()[AnnotationType.points][0].labels, + label_names=kp_labels, label_groups=[], + label_ids=[str(i) for i in range(len(kp_labels))], ) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index 0ab803b4f58..5672989e7fd 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -168,6 +168,7 @@ def __init__( stack_images: bool = True, to_tv_image: bool = True, ignore_index: int = 255, + data_format: str = "", ) -> None: super().__init__( dm_subset, @@ -188,6 +189,7 @@ def __init__( label_names=self.label_info.label_names, label_groups=self.label_info.label_groups, ignore_index=ignore_index, + label_ids=self.label_info.label_ids, ) self.ignore_index = ignore_index diff --git a/src/otx/core/data/dataset/tile.py b/src/otx/core/data/dataset/tile.py index d69c94b03e0..9de62a5c0c6 100644 --- a/src/otx/core/data/dataset/tile.py +++ b/src/otx/core/data/dataset/tile.py @@ -7,6 +7,8 @@ import logging as log import operator +import warnings +from collections import defaultdict from copy import deepcopy from itertools import product from typing import TYPE_CHECKING, Callable @@ -14,6 +16,7 @@ import numpy as np import shapely.geometry as sg import torch +from datumaro import Bbox, DatasetItem, Ellipse, Image, Polygon from datumaro import Dataset as DmDataset from datumaro import DatasetItem, Image from datumaro.components.annotation import AnnotationType, Bbox, ExtractedMask, Polygon @@ -96,7 +99,7 @@ def __init__( ) self._tile_size = tile_size self._tile_ann_func_map[AnnotationType.polygon] = OTXTileTransform._tile_polygon - self._tile_ann_func_map[AnnotationType.mask] = OTXTileTransform._tile_masks + self._tile_ann_func_map[AnnotationType.ellipse] = OTXTileTransform._tile_ellipse self.with_full_img = with_full_img @staticmethod @@ -138,27 +141,42 @@ def _tile_polygon( ) @staticmethod - def _tile_masks( - ann: ExtractedMask, - roi_int: BboxIntCoords, + def _tile_ellipse( + ann: Ellipse, + roi_box: sg.Polygon, + threshold_drop_ann: float = 0.8, *args, # noqa: ARG004 **kwargs, # noqa: ARG004 - ) -> ExtractedMask: - """Extracts a tile mask from the given annotation. + ) -> Polygon | None: + polygon = sg.Polygon(ann.get_points(num_points=10)) - Note: Original Datumaro _tile_masks does not work with ExtractedMask. + # NOTE: polygon may be invalid, e.g. self-intersecting + if not roi_box.intersects(polygon) or not polygon.is_valid: + return None - Args: - ann (ExtractedMask): datumaro ExtractedMask annotation. - roi_int (BboxIntCoords): ROI coordinates. + # NOTE: intersection may return a GeometryCollection or MultiPolygon + inter = polygon.intersection(roi_box) + if isinstance(inter, (sg.GeometryCollection, sg.MultiPolygon)): + shapes = [(geom, geom.area) for geom in list(inter.geoms) if geom.is_valid] + if not shapes: + return None - Returns: - ExtractedMask: ExtractedMask annotation. - """ - x, y, w, h = roi_int - return ann.wrap( - index_mask=ann.index_mask()[y : y + h, x : x + w], + inter, _ = max(shapes, key=operator.itemgetter(1)) + + if not isinstance(inter, sg.Polygon) and not inter.is_valid: + return None + + prop_area = inter.area / polygon.area + + if prop_area < threshold_drop_ann: + return None + + inter = _apply_offset(inter, roi_box) + + return Polygon( + points=[p for xy in inter.exterior.coords for p in xy], attributes=deepcopy(ann.attributes), + label=ann.label, ) def _extract_rois(self, image: Image) -> list[BboxIntCoords]: @@ -254,6 +272,7 @@ def __init__(self, dataset: OTXDataset, tile_config: TileConfig) -> None: dataset.image_color_channel, dataset.stack_images, dataset.to_tv_image, + data_format=dataset.data_format, ) self.tile_config = tile_config self._dataset = dataset @@ -416,14 +435,17 @@ def _get_item_impl(self, index: int) -> TileDetDataEntity: # type: ignore[overr img = item.media_as(Image) img_data, img_shape, _ = self._get_img_data_and_shape(img) - bbox_anns = [ann for ann in item.annotations if isinstance(ann, Bbox)] + gt_bboxes = [ann for ann in item.annotations if isinstance(ann, Bbox)] + + if empty_anno := len(gt_bboxes) == 0: + warnings.warn(f"Empty annotation for image {item.id}!", stacklevel=2) bboxes = ( - np.stack([ann.points for ann in bbox_anns], axis=0).astype(np.float32) - if len(bbox_anns) > 0 - else np.zeros((0, 4), dtype=np.float32) + np.empty((0, 4), dtype=np.float32) + if empty_anno + else np.stack([ann.points for ann in gt_bboxes], axis=0).astype(np.float32) ) - labels = torch.as_tensor([ann.label for ann in bbox_anns]) + labels = torch.as_tensor([ann.label for ann in gt_bboxes]) tile_entities, tile_attrs = self.get_tiles(img_data, item, index) @@ -507,24 +529,51 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity: # type: ignore[o img = item.media_as(Image) img_data, img_shape, _ = self._get_img_data_and_shape(img) + anno_collection: dict[str, list] = defaultdict(list) + for anno in item.annotations: + anno_collection[anno.__class__.__name__].append(anno) + gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], [] - for annotation in item.annotations: - if isinstance(annotation, Polygon): - bbox = np.array(annotation.get_bbox(), dtype=np.float32) + # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363 + # Temporary solution to handle multiple annotation types. + # Ideally, we should pre-filter annotations during initialization of the dataset. + + if Polygon.__name__ in anno_collection: # Polygon for InstSeg has higher priority + for poly in anno_collection[Polygon.__name__]: + bbox = Bbox(*poly.get_bbox()).points gt_bboxes.append(bbox) - gt_labels.append(annotation.label) + gt_labels.append(poly.label) if self._dataset.include_polygons: - gt_polygons.append(annotation) + gt_polygons.append(poly) else: - gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0]) - - # convert xywh to xyxy format - bboxes = np.array(gt_bboxes, dtype=np.float32) - bboxes[:, 2:] += bboxes[:, :2] + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Bbox.__name__ in anno_collection: + boxes = anno_collection[Bbox.__name__] + gt_bboxes = [ann.points for ann in boxes] + gt_labels = [ann.label for ann in boxes] + for box in boxes: + poly = Polygon(box.as_polygon()) + if self._dataset.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Ellipse.__name__ in anno_collection: + for ellipse in anno_collection[Ellipse.__name__]: + bbox = Bbox(*ellipse.get_bbox()).points + gt_bboxes.append(bbox) + gt_labels.append(ellipse.label) + poly = Polygon(ellipse.as_polygon(num_points=10)) + if self._dataset.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + else: + warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2) - masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool) + bboxes = np.stack(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4), dtype=np.float32) + masks = np.stack(gt_masks, axis=0) if gt_masks else np.empty((0, *img_shape), dtype=bool) labels = np.array(gt_labels, dtype=np.int64) tile_entities, tile_attrs = self.get_tiles(img_data, item, index) diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py index fd731109269..1f1e500b0fb 100644 --- a/src/otx/core/data/factory.py +++ b/src/otx/core/data/factory.py @@ -73,6 +73,7 @@ def create( # noqa: PLR0911 dm_subset: DmDataset, cfg_subset: SubsetConfig, mem_cache_handler: MemCacheHandlerBase, + data_format: str, mem_cache_img_max_size: tuple[int, int] | None = None, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -85,6 +86,7 @@ def create( # noqa: PLR0911 common_kwargs = { "dm_subset": dm_subset, "transforms": transforms, + "data_format": data_format, "mem_cache_handler": mem_cache_handler, "mem_cache_img_max_size": mem_cache_img_max_size, "image_color_channel": image_color_channel, diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py index f9b7cac8fd4..6449a07c270 100644 --- a/src/otx/core/data/module.py +++ b/src/otx/core/data/module.py @@ -107,13 +107,6 @@ def __init__( # noqa: PLR0913 self.subsets: dict[str, OTXDataset] = {} self.save_hyperparameters(ignore=["input_size"]) - # TODO (Jaeguk): This is workaround for a bug in Datumaro. - # These lines should be removed after next datumaro release. - # https://github.com/openvinotoolkit/datumaro/pull/1223/files - from datumaro.plugins.data_formats.video import VIDEO_EXTENSIONS - - VIDEO_EXTENSIONS.append(".mp4") - dataset = DmDataset.import_from(self.data_root, format=self.data_format) if self.task != "H_LABEL_CLS": dataset = pre_filtering( @@ -195,6 +188,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset.as_dataset(), cfg_subset=config_mapping[name], mem_cache_handler=mem_cache_handler, + data_format=self.data_format, mem_cache_img_max_size=mem_cache_img_max_size, image_color_channel=image_color_channel, stack_images=stack_images, @@ -238,6 +232,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[transform_key] = unlabeled_dataset else: @@ -252,6 +247,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[name] = unlabeled_dataset diff --git a/src/otx/core/data/pre_filtering.py b/src/otx/core/data/pre_filtering.py index 13fc08c7ebc..90487367d17 100644 --- a/src/otx/core/data/pre_filtering.py +++ b/src/otx/core/data/pre_filtering.py @@ -88,7 +88,7 @@ def remove_unused_labels( used_labels = [0, *used_labels] if data_format == "common_semantic_segmentation_with_subset_dirs" and len(original_categories) < len(used_labels): msg = ( - "There are labeles mismatch in dataset categories and actuall categories comes from semantic masks." + "There are labels mismatch in dataset categories and actual categories comes from semantic masks." "Please, check `dataset_meta.json` file." ) raise ValueError(msg) diff --git a/src/otx/core/exporter/exportable_code/demo/requirements.txt b/src/otx/core/exporter/exportable_code/demo/requirements.txt index 9e4e68ff919..1a7acacf660 100644 --- a/src/otx/core/exporter/exportable_code/demo/requirements.txt +++ b/src/otx/core/exporter/exportable_code/demo/requirements.txt @@ -1,3 +1,7 @@ +<<<<<<< HEAD openvino==2024.5.0 +======= +openvino==2024.3.0 +>>>>>>> releases/2.2.0 openvino-model-api==0.2.5 numpy==1.26.4 diff --git a/src/otx/core/metrics/pck.py b/src/otx/core/metrics/pck.py index 966d56d841f..e69bcfd26fb 100644 --- a/src/otx/core/metrics/pck.py +++ b/src/otx/core/metrics/pck.py @@ -186,6 +186,7 @@ def compute(self) -> dict: gt_kpts_processed.append(p[0]) gt_kpts = np.stack(gt_kpts_processed) +<<<<<<< HEAD kpts_visible = [] for p in self.targets: if len(p[1].shape) == 3 and p[1].shape[0] == 1: @@ -196,6 +197,9 @@ def compute(self) -> dict: kpts_visible_stacked = np.stack(kpts_visible) normalize = np.tile(np.array([self.input_size[::-1]]), (pred_kpts.shape[0], 1)) +======= + normalize = np.tile(np.array([self.input_size]), (pred_kpts.shape[0], 1)) +>>>>>>> releases/2.2.0 _, avg_acc, _ = keypoint_pck_accuracy( pred_kpts, gt_kpts, diff --git a/src/otx/core/model/anomaly.py b/src/otx/core/model/anomaly.py index 4354386a442..7b96adc8a35 100644 --- a/src/otx/core/model/anomaly.py +++ b/src/otx/core/model/anomaly.py @@ -4,7 +4,11 @@ from __future__ import annotations +<<<<<<< HEAD from typing import TYPE_CHECKING, Any, Sequence, TypeAlias +======= +from typing import TYPE_CHECKING, Any, TypeAlias +>>>>>>> releases/2.2.0 import torch from anomalib import TaskType as AnomalibTaskType @@ -45,6 +49,10 @@ from torch.optim.optimizer import Optimizer from torchmetrics import Metric +<<<<<<< HEAD +======= + from otx.core.types.label import LabelInfoTypes +>>>>>>> releases/2.2.0 AnomalyModelInputs: TypeAlias = ( AnomalyClassificationDataBatch | AnomalySegmentationDataBatch | AnomalyDetectionDataBatch @@ -62,8 +70,13 @@ class OTXAnomaly(OTXModel): Model input size in the order of height and width. Defaults to None. """ +<<<<<<< HEAD def __init__(self) -> None: super().__init__(label_info=AnomalyLabelInfo(), input_size=self.input_size) +======= + def __init__(self, label_info: LabelInfoTypes, input_size: tuple[int, int]) -> None: + super().__init__(label_info=label_info, input_size=input_size) +>>>>>>> releases/2.2.0 self.optimizer: list[OptimizerCallable] | OptimizerCallable = None self.scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = None self.trainer: Trainer diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py index a48325ca98c..a190a5f1bc6 100644 --- a/src/otx/core/model/base.py +++ b/src/otx/core/model/base.py @@ -404,6 +404,11 @@ def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) - msg = "Checkpoint should have `label_info`." raise ValueError(msg, ckpt_label_info) + if not hasattr(ckpt_label_info, "label_ids"): + msg = "Loading checkpoint from OTX < 2.2.1, label_ids are assigned automatically" + logger.info(msg) + ckpt_label_info.label_ids = [str(i) for i, _ in enumerate(ckpt_label_info.label_names)] + if ckpt_label_info != self.label_info: msg = ( "Load model state dictionary incrementally: " @@ -757,7 +762,7 @@ def lr_scheduler_step(self, scheduler: LRSchedulerTypeUnion, metric: Tensor) -> return super().lr_scheduler_step(scheduler=scheduler, metric=metric) if len(warmup_schedulers) != 1: - msg = "No more than two warmup schedulers coexist." + msg = "No more than one warmup schedulers coexist." raise RuntimeError(msg) warmup_scheduler = next(iter(warmup_schedulers)) @@ -822,7 +827,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return LabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return LabelInfo(label_names=label_info, label_groups=[label_info]) + return LabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, LabelInfo): return label_info @@ -1115,7 +1124,7 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo: ) logger.warning(msg) - return LabelInfo(label_names=label_names, label_groups=[label_names]) + return LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=[]) msg = "Cannot construct LabelInfo from OpenVINO IR. Please check this model is trained by OTX." raise ValueError(msg) diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py index a22cc15fbc4..eeebba408fe 100644 --- a/src/otx/core/model/segmentation.py +++ b/src/otx/core/model/segmentation.py @@ -93,7 +93,6 @@ def __init__( self.unsupervised_weight = unsupervised_weight self.semisl_start_epoch = semisl_start_epoch self.drop_unreliable_pixels_percent = drop_unreliable_pixels_percent - super().__init__( label_info=label_info, input_size=input_size, @@ -254,7 +253,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return SegLabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return SegLabelInfo(label_names=label_info, label_groups=[label_info]) + return SegLabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, SegLabelInfo): return label_info diff --git a/src/otx/core/schedulers/warmup_schedulers.py b/src/otx/core/schedulers/warmup_schedulers.py index 6de763bb52b..0b1d12a711e 100644 --- a/src/otx/core/schedulers/warmup_schedulers.py +++ b/src/otx/core/schedulers/warmup_schedulers.py @@ -19,8 +19,9 @@ class LinearWarmupScheduler(LambdaLR): """Linear Warmup scheduler. Args: - num_warmup_steps: Learning rate will linearly increased during the period same as this number. - warmup_interval: If "epoch", count the number of steps for the warmup period. + optimizer (Optimizer): Optimizer to apply the scheduler. + num_warmup_steps (int): Learning rate will linearly increased during the period same as this number. + interval (Literal["step", "epoch"]): If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. """ @@ -55,7 +56,7 @@ class LinearWarmupSchedulerCallable: main_scheduler_callable: Callable to create a LR scheduler that will be mainly used. num_warmup_steps: Learning rate will linearly increased during the period same as this number. If it is less than equal to zero, do not create `LinearWarmupScheduler`. - warmup_interval: If "epoch", count the number of steps for the warmup period. + warmup_interval: If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. monitor: If given, override the main scheduler's `monitor` attribute. """ diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py index cc9c592f3b9..875c0651287 100644 --- a/src/otx/core/types/export.py +++ b/src/otx/core/types/export.py @@ -9,6 +9,7 @@ from dataclasses import dataclass, fields from enum import Enum +import otx from otx.core.config.data import TileConfig from otx.core.types.label import HLabelInfo, LabelInfo @@ -112,6 +113,7 @@ def to_metadata(self) -> dict[tuple[str, str], str]: ("model_info", "labels"): all_labels.strip(), ("model_info", "label_ids"): all_label_ids.strip(), ("model_info", "optimization_config"): json.dumps(self.optimization_config), + ("model_info", "otx_version"): otx.__version__, } if isinstance(self.label_info, HLabelInfo): diff --git a/src/otx/core/types/label.py b/src/otx/core/types/label.py index c89f67d7fd6..19c3ece3bb4 100644 --- a/src/otx/core/types/label.py +++ b/src/otx/core/types/label.py @@ -5,10 +5,13 @@ from __future__ import annotations +import copy import json from dataclasses import asdict, dataclass from typing import TYPE_CHECKING, Any +from datumaro.components.annotation import GroupType + if TYPE_CHECKING: from datumaro import Label, LabelCategories @@ -27,6 +30,7 @@ class LabelInfo: """Object to represent label information.""" label_names: list[str] + label_ids: list[str] label_groups: list[list[str]] @property @@ -51,10 +55,12 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: return NullLabelInfo() label_names = [f"label_{idx}" for idx in range(num_classes)] + label_ids = [str(i) for i in range(num_classes)] return cls( label_names=label_names, label_groups=[label_names], + label_ids=label_ids, ) @classmethod @@ -79,6 +85,38 @@ def from_dm_label_groups(cls, dm_label_categories: LabelCategories) -> LabelInfo return LabelInfo( label_names=label_names, label_groups=label_groups, + label_ids=[str(i) for i in range(len(label_names))], + ) + + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> LabelInfo: + """Overload to support datumaro's arrow format.""" + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + label_names.append(attr[len("__name__") :]) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow format: can not extract label names from attributes" + raise ValueError(msg) + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + label_groups = [label_group.labels for label_group in dm_label_categories.label_groups] + if len(label_groups) == 0: # Single-label classification + label_groups = [label_names] + + label_ids = [item.name for item in dm_label_categories.items] + + return LabelInfo( + label_names=label_names, + label_groups=label_groups, + label_ids=label_ids, ) def as_dict(self) -> dict[str, Any]: @@ -279,8 +317,60 @@ def convert_labels_if_needed( label_to_idx=label_to_idx, label_tree_edges=get_label_tree_edges(dm_label_categories.items), empty_multiclass_head_indices=[], # consider the label removing case + label_ids=[str(i) for i in range(len(label_names))], ) + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> HLabelInfo: + """Generate HLabelData from the Datumaro LabelCategories. Arrow-specific implementation. + + Args: + dm_label_categories (LabelCategories): the label categories of datumaro. + """ + dm_label_categories = copy.deepcopy(dm_label_categories) + + empty_label_name = None + for label_group in dm_label_categories.label_groups: + if label_group.group_type == GroupType.RESTRICTED: + empty_label_name = label_group.labels[0] + + dm_label_categories.label_groups = [ + group for group in dm_label_categories.label_groups if group.group_type != GroupType.RESTRICTED + ] + + empty_label_id = None + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + name = attr[len("__name__") :] + if name == empty_label_name: + empty_label_id = item.name + label_names.append(name) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow file: can not extract label names from attributes" + raise ValueError(msg) + + if empty_label_name is not None: + label_names.remove(empty_label_name) + dm_label_categories.items = [item for item in dm_label_categories.items if item.name != empty_label_id] + label_ids = [item.name for item in dm_label_categories.items] + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for i, item in enumerate(dm_label_categories.items): + item.name = label_names[i] + item.parent = id_to_name_mapping.get(item.parent, item.parent) + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + obj = cls.from_dm_label_groups(dm_label_categories) + obj.label_ids = label_ids + return obj + def as_head_config_dict(self) -> dict[str, Any]: """Return a dictionary including params needed to configure the HLabel MMPretrained head network.""" return { @@ -326,7 +416,7 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: if num_classes == 1: # binary segmentation label_names = ["background", "label_0"] - return SegLabelInfo(label_names=label_names, label_groups=[label_names]) + return SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=["0", "1"]) return super().from_num_classes(num_classes) @@ -336,7 +426,7 @@ class NullLabelInfo(LabelInfo): """Represent no label information. It is used for Visual Prompting tasks.""" def __init__(self) -> None: - super().__init__(label_names=[], label_groups=[[]]) + super().__init__(label_names=[], label_groups=[[]], label_ids=[]) @classmethod def from_json(cls, _: str) -> LabelInfo: @@ -349,7 +439,7 @@ class AnomalyLabelInfo(LabelInfo): """Represent no label information. It is used for Anomaly tasks.""" def __init__(self) -> None: - super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]]) + super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]], label_ids=["0", "1"]) # Dispatching rules: diff --git a/src/otx/recipe/_base_/train.yaml b/src/otx/recipe/_base_/train.yaml index 7dba87f8381..806d09e3e0a 100644 --- a/src/otx/recipe/_base_/train.yaml +++ b/src/otx/recipe/_base_/train.yaml @@ -40,6 +40,8 @@ callbacks: init_args: max_interval: 5 decay: -0.025 + min_earlystop_patience: 5 + min_lrschedule_patience: 3 logger: - class_path: lightning.pytorch.loggers.csv_logs.CSVLogger init_args: diff --git a/src/otx/recipe/anomaly_classification/stfpm.yaml b/src/otx/recipe/anomaly_classification/stfpm.yaml index ec1c6af8ddc..91cf676c201 100644 --- a/src/otx/recipe/anomaly_classification/stfpm.yaml +++ b/src/otx/recipe/anomaly_classification/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_detection/stfpm.yaml b/src/otx/recipe/anomaly_detection/stfpm.yaml index b13534505a4..25bb7be88bb 100644 --- a/src/otx/recipe/anomaly_detection/stfpm.yaml +++ b/src/otx/recipe/anomaly_detection/stfpm.yaml @@ -21,7 +21,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_segmentation/stfpm.yaml b/src/otx/recipe/anomaly_segmentation/stfpm.yaml index 9a3d9c85d6e..604ff9ba029 100644 --- a/src/otx/recipe/anomaly_segmentation/stfpm.yaml +++ b/src/otx/recipe/anomaly_segmentation/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml index b36f48e14c9..1191e0e22d3 100644 --- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml @@ -10,12 +10,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -26,11 +30,12 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 data: task: H_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml index 4bfbe3fc121..2bb282e4378 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -29,11 +33,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml index 500cc168baa..9d3905bf28f 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml @@ -1,7 +1,11 @@ model: class_path: otx.algo.classification.timm_model.TimmModelForHLabelCls init_args: +<<<<<<< HEAD model_name: tf_efficientnetv2_s.in21k +======= + backbone: tf_efficientnetv2_s.in21k +>>>>>>> releases/2.2.0 optimizer: class_path: torch.optim.SGD @@ -10,6 +14,21 @@ model: momentum: 0.9 weight_decay: 0.0001 +<<<<<<< HEAD +======= + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy + +>>>>>>> releases/2.2.0 engine: task: H_LABEL_CLS device: auto @@ -25,8 +44,16 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: +<<<<<<< HEAD patience: 3 warmup_iters: 750 +======= + warmup_iters: 750 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 + +>>>>>>> releases/2.2.0 data: task: H_LABEL_CLS data_format: datumaro diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml index 211bc8fa883..c94b7dd16b6 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml @@ -19,7 +19,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,10 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml index 2078c98b43b..d36cdfff5b7 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml index 0f2d7b60a6a..9bec7e924e6 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml index faab071ff5d..d00a5109f7d 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml index f5446d3cca6..cdc06e19f52 100644 --- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -28,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml index 300091fab8c..a11967f1068 100644 --- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -27,7 +31,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml index 428fb89055b..fe6244853d6 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml index 2454c0e7094..58fa2ba0308 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml @@ -2,7 +2,11 @@ model: class_path: otx.algo.classification.timm_model.TimmModelForMulticlassCls init_args: label_info: 1000 +<<<<<<< HEAD model_name: tf_efficientnetv2_s.in21k +======= + backbone: tf_efficientnetv2_s.in21k +>>>>>>> releases/2.2.0 optimizer: class_path: torch.optim.SGD @@ -12,12 +16,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +38,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml index c4c6946fd6e..fdef97ef9c8 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -32,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yml b/src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yml index 1187d41d3ba..eedf11530aa 100644 --- a/src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yml +++ b/src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yml @@ -2,7 +2,11 @@ model: class_path: otx.algo.classification.timm_model.TimmModelForMulticlassCls init_args: label_info: 1000 +<<<<<<< HEAD:src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yml model_name: tf_efficientnetv2_s.in21k +======= + backbone: tf_efficientnetv2_s.in21k +>>>>>>> releases/2.2.0:src/otx/recipe/classification/multi_class_cls/semisl/efficientnet_v2_semisl.yaml train_type: SEMI_SUPERVISED optimizer: diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml index f06b3b36e32..ccd26a6535e 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml index c72714e9433..06d702e8576 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml index 4c6975c241a..9d626812765 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml index afb14dd046f..623bc178f4b 100644 --- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -27,14 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 200 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml index f3625158439..1859e0aa5fa 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml index a304d76542b..e1382981959 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml @@ -2,7 +2,11 @@ model: class_path: otx.algo.classification.timm_model.TimmModelForMultilabelCls init_args: label_info: 1000 +<<<<<<< HEAD model_name: tf_efficientnetv2_s.in21k +======= + backbone: tf_efficientnetv2_s.in21k +>>>>>>> releases/2.2.0 optimizer: class_path: torch.optim.SGD @@ -12,12 +16,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,15 +38,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml index 02021708453..f9322f22f07 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,11 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 - data: task: MULTI_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml index 9579f8e5e57..ebc03324933 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml index 3003b26eb48..a1992d2b398 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,11 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml index 492e835ef62..99ef63b59e1 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index adabd373f1e..ee8925cfce6 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 981aae94fdb..6305ed7345e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 0a6bb28bbde..290c47ab5cf 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101_tile.yaml b/src/otx/recipe/detection/atss_resnext101_tile.yaml index 8f78195f637..fbb297212b7 100644 --- a/src/otx/recipe/detection/atss_resnext101_tile.yaml +++ b/src/otx/recipe/detection/atss_resnext101_tile.yaml @@ -1,7 +1,12 @@ model: +<<<<<<< HEAD class_path: otx.algo.detection.atss.ATSS init_args: model_name: atss_resnext101 +======= + class_path: otx.algo.detection.atss.ResNeXt101ATSS + init_args: +>>>>>>> releases/2.2.0 label_info: 80 optimizer: @@ -14,7 +19,11 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: +<<<<<<< HEAD num_warmup_steps: 3 +======= + num_warmup_steps: 0 +>>>>>>> releases/2.2.0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtdetr_101.yaml b/src/otx/recipe/detection/rtdetr_101.yaml index d8c49788990..5b35ba9c125 100644 --- a/src/otx/recipe/detection/rtdetr_101.yaml +++ b/src/otx/recipe/detection/rtdetr_101.yaml @@ -55,8 +55,14 @@ overrides: - class_path: torchvision.transforms.v2.RandomPhotometricDistort init_args: p: 0.5 +<<<<<<< HEAD - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion enable: false +======= + - class_path: otx.core.data.transform_libs.torchvision.RandomFlip + init_args: + prob: 0.5 +>>>>>>> releases/2.2.0 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: $(input_size) diff --git a/src/otx/recipe/detection/rtdetr_101_tile.yaml b/src/otx/recipe/detection/rtdetr_101_tile.yaml index 918a173dedf..b865a225cea 100644 --- a/src/otx/recipe/detection/rtdetr_101_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_101_tile.yaml @@ -1,7 +1,12 @@ model: +<<<<<<< HEAD class_path: otx.algo.detection.rtdetr.RTDETR init_args: model_name: rtdetr_101 +======= + class_path: otx.algo.detection.rtdetr.RTDETR101 + init_args: +>>>>>>> releases/2.2.0 label_info: 80 optimizer: @@ -14,7 +19,11 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: +<<<<<<< HEAD num_warmup_steps: 5 +======= + num_warmup_steps: 100 +>>>>>>> releases/2.2.0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -35,6 +44,7 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 +<<<<<<< HEAD decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup @@ -44,6 +54,11 @@ overrides: patience: 10 check_on_train_epoch_end: false min_delta: 0.001 +======= + min_lrschedule_patience: 3 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: +>>>>>>> releases/2.2.0 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtdetr_18.yaml b/src/otx/recipe/detection/rtdetr_18.yaml index 2af7186f556..fa4f3567cc5 100644 --- a/src/otx/recipe/detection/rtdetr_18.yaml +++ b/src/otx/recipe/detection/rtdetr_18.yaml @@ -54,8 +54,14 @@ overrides: - class_path: torchvision.transforms.v2.RandomPhotometricDistort init_args: p: 0.5 +<<<<<<< HEAD - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion enable: false +======= + - class_path: otx.core.data.transform_libs.torchvision.RandomFlip + init_args: + prob: 0.5 +>>>>>>> releases/2.2.0 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: $(input_size) diff --git a/src/otx/recipe/detection/rtdetr_18_tile.yaml b/src/otx/recipe/detection/rtdetr_18_tile.yaml index d79091eb56c..a2ec9f5e4de 100644 --- a/src/otx/recipe/detection/rtdetr_18_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_18_tile.yaml @@ -1,7 +1,12 @@ model: +<<<<<<< HEAD class_path: otx.algo.detection.rtdetr.RTDETR init_args: model_name: rtdetr_18 +======= + class_path: otx.algo.detection.rtdetr.RTDETR18 + init_args: +>>>>>>> releases/2.2.0 label_info: 80 optimizer: @@ -14,7 +19,11 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: +<<<<<<< HEAD num_warmup_steps: 5 +======= + num_warmup_steps: 0 +>>>>>>> releases/2.2.0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -34,6 +43,7 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 +<<<<<<< HEAD decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup @@ -43,6 +53,11 @@ overrides: patience: 10 check_on_train_epoch_end: false min_delta: 0.001 +======= + min_lrschedule_patience: 3 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: +>>>>>>> releases/2.2.0 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtdetr_50.yaml b/src/otx/recipe/detection/rtdetr_50.yaml index 9d36388d91b..049f12ddf2e 100644 --- a/src/otx/recipe/detection/rtdetr_50.yaml +++ b/src/otx/recipe/detection/rtdetr_50.yaml @@ -55,8 +55,14 @@ overrides: - class_path: torchvision.transforms.v2.RandomPhotometricDistort init_args: p: 0.5 +<<<<<<< HEAD - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion enable: false +======= + - class_path: otx.core.data.transform_libs.torchvision.RandomFlip + init_args: + prob: 0.5 +>>>>>>> releases/2.2.0 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: $(input_size) diff --git a/src/otx/recipe/detection/rtdetr_50_tile.yaml b/src/otx/recipe/detection/rtdetr_50_tile.yaml index 4c0bfdb1e64..9e12a4a2f7e 100644 --- a/src/otx/recipe/detection/rtdetr_50_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_50_tile.yaml @@ -1,7 +1,12 @@ model: +<<<<<<< HEAD class_path: otx.algo.detection.rtdetr.RTDETR init_args: model_name: rtdetr_50 +======= + class_path: otx.algo.detection.rtdetr.RTDETR50 + init_args: +>>>>>>> releases/2.2.0 label_info: 80 optimizer: @@ -14,7 +19,11 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: +<<<<<<< HEAD num_warmup_steps: 5 +======= + num_warmup_steps: 100 +>>>>>>> releases/2.2.0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -35,6 +44,7 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 +<<<<<<< HEAD decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup @@ -44,6 +54,11 @@ overrides: patience: 10 check_on_train_epoch_end: false min_delta: 0.001 +======= + min_lrschedule_patience: 3 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: +>>>>>>> releases/2.2.0 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 69b6eef9978..73eec5d3016 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml index 716f5151e8f..21686f1ccd3 100644 --- a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml @@ -1,7 +1,12 @@ model: +<<<<<<< HEAD class_path: otx.algo.detection.rtmdet.RTMDet init_args: model_name: rtmdet_tiny +======= + class_path: otx.algo.detection.rtmdet.RTMDetTiny + init_args: +>>>>>>> releases/2.2.0 label_info: 80 optimizer: @@ -13,7 +18,11 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: +<<<<<<< HEAD num_warmup_steps: 3 +======= + num_warmup_steps: 0 +>>>>>>> releases/2.2.0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 5b3ace2b81c..35cc1135d40 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 620b55107b2..45d57954753 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index ed0ecfeec41..d457e2b9207 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index d9740706813..c5d4bf7210e 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 460b8c5b1bd..e547174e4e0 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 967639269b2..e3138139e4b 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 86395c02f0d..fa954fb257c 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 6b1c012e642..70c0c97cf04 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -43,6 +43,10 @@ overrides: num_workers: 4 batch_size: 8 transforms: +<<<<<<< HEAD +======= + - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug +>>>>>>> releases/2.2.0 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: $(input_size) diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index 4364cde6acf..a254eb755ea 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index b3dcf395c96..69751c03db0 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 59302d244b9..a3c074cd585 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -2,10 +2,10 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_name: dinov2_vits14 + model_name: dinov2-small-seg input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -17,11 +17,15 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.PolynomialLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - total_iters: 150 - power: 0.9 - last_epoch: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 150 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -33,8 +37,8 @@ data: ../_base_/data/semantic_segmentation.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml index 721085499db..da9a62fa4be 100644 --- a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml +++ b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml @@ -2,11 +2,11 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_name: dinov2_vits14 + model_version: dinov2-small-seg train_type: SEMI_SUPERVISED input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -34,8 +34,8 @@ data: ../../_base_/data/semisl/semantic_segmentation_semisl.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml index 5abb0004bac..2c0ca578cb8 100644 --- a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml +++ b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml @@ -4,6 +4,7 @@ model: label_info: 2 model_name: lite_hrnet_x train_type: SEMI_SUPERVISED + drop_unreliable_pixels_percent: 80 optimizer: class_path: torch.optim.Adam diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 377d80b3722..4c8646bab0a 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index bc3bf89351a..4493af39562 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/tools/converter.py b/src/otx/tools/converter.py index d8f26bc72a3..1e670930cf0 100644 --- a/src/otx/tools/converter.py +++ b/src/otx/tools/converter.py @@ -248,6 +248,9 @@ def convert(config_path: str, task: OTXTaskType | None = None) -> dict: task_info = TEMPLATE_ID_DICT[template_config["model_template_id"]] if param_dict.get("enable_tiling", None) and not task_info["model_name"].endswith("_tile"): task_info["model_name"] += "_tile" + # classification task type can't be deducted from template name, try to extract from config + if "sub_task_type" in template_config and "_CLS" in task_info["task"]: + task_info["task"] = template_config["sub_task_type"] if task is not None: task_info["task"] = task default_config = ConfigConverter._get_default_config(task_info) @@ -317,13 +320,16 @@ def update_num_workers(param_value: int) -> None: config["data"]["test_subset"]["num_workers"] = param_value def update_enable_early_stopping(param_value: bool) -> None: - idx = ConfigConverter._get_callback_idx(config["callbacks"], "lightning.pytorch.callbacks.EarlyStopping") + idx = ConfigConverter._get_callback_idx( + config["callbacks"], + "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup", + ) if not param_value and idx > -1: config["callbacks"].pop(idx) def update_early_stop_patience(param_value: int) -> None: for callback in config["callbacks"]: - if callback["class_path"] == "lightning.pytorch.callbacks.EarlyStopping": + if callback["class_path"] == "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup": callback["init_args"]["patience"] = param_value break diff --git a/src/otx/tools/templates/classification/configuration.yaml b/src/otx/tools/templates/classification/configuration.yaml index ed91ea1cfa3..93f972b7a87 100644 --- a/src/otx/tools/templates/classification/configuration.yaml +++ b/src/otx/tools/templates/classification/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -174,7 +174,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 3 + default_value: 5 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -186,26 +186,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 8 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 + value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: @@ -264,7 +245,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml index 4db892a3131..001e1e3d995 100644 --- a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml +++ b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml @@ -20,7 +20,7 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 64 + default_value: 48 auto_hpo_state: POSSIBLE learning_rate: default_value: 0.01 diff --git a/src/otx/tools/templates/detection/detection/configuration.yaml b/src/otx/tools/templates/detection/detection/configuration.yaml index 5cb11d83c9f..9fe02a3d28a 100644 --- a/src/otx/tools/templates/detection/detection/configuration.yaml +++ b/src/otx/tools/templates/detection/detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -124,7 +124,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml index c161471d452..4be2e224f8a 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml index f8ef1d4acd3..c52b6437683 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml index e2e426840ed..788781a95bf 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.0002 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml index 97f85fed008..111b5326a5a 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml index 94dd429e1f1..a650385b964 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml index 3cdde945a08..88313f67c18 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.01 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml index cf12454e78d..922da7a8003 100644 --- a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml index 1394cf44159..a86c1db66fe 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml index 7738c65f1b7..0cefdee4c57 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml index 408e48cd8fb..c85fdc8c4ce 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml index 8b110503b62..f492ac3d240 100644 --- a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml @@ -28,7 +28,11 @@ hyper_parameters: default_value: 0.0007 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: +<<<<<<< HEAD default_value: 3 +======= + default_value: 0 +>>>>>>> releases/2.2.0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml index a1c2078ed62..20421f3fd16 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml index 4b5e21a4f83..7d8b74ebad6 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.015 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml index e9e289c6bf6..e90824fd359 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml index bd2248adbcd..8420763ef39 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml index b41ea7dda25..524376b9d0a 100644 --- a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml index 21e079c489a..12db39ea30b 100644 --- a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml index 4cb51f466eb..8626082dd50 100644 --- a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml @@ -27,8 +27,11 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE +<<<<<<< HEAD learning_rate_warmup_iters: default_value: 100 +======= +>>>>>>> releases/2.2.0 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/keypoint_detection/configuration.yaml b/src/otx/tools/templates/keypoint_detection/configuration.yaml index e745d787c80..1ef84c01919 100644 --- a/src/otx/tools/templates/keypoint_detection/configuration.yaml +++ b/src/otx/tools/templates/keypoint_detection/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -120,7 +120,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null learning_rate_warmup_iters: @@ -189,25 +189,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -248,7 +229,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/configuration.yaml b/src/otx/tools/templates/segmentation/configuration.yaml index 23356c696db..c7df2603ec7 100644 --- a/src/otx/tools/templates/segmentation/configuration.yaml +++ b/src/otx/tools/templates/segmentation/configuration.yaml @@ -54,8 +54,8 @@ learning_parameters: default_value: 100 description: In this periods of initial training iterations, the model will be trained in low learning rate, - which will be increased incrementally up to the expected learning rate setting. - This warm-up phase is known to be helpful to stabilize training, thus result in better performance. + which will be increased linearly up to the expected learning rate setting. + This warm-up phase is known to be helpful to stabilize training, therefore, can lead to increased performance. editable: true header: Number of iterations for learning rate warmup max_value: 10000 @@ -74,11 +74,11 @@ learning_parameters: auto_hpo_state: not_possible auto_hpo_value: null default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -109,7 +109,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -143,7 +143,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 7 + default_value: 10 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -158,25 +158,6 @@ learning_parameters: value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. enable_supcon: affects_outcome_of: TRAINING default_value: false @@ -219,7 +200,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml index ac837fa5007..630af3c6b7c 100644 --- a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml +++ b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml index ef390639238..88f3d5d41a3 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml index 9afd2660cf2..0c13b203b84 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml index c5879535caa..e23c8c91104 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml index 56af9f4b2b2..ce4e4da393f 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml @@ -24,10 +24,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml index 6637673c64e..6c8be01af2a 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml index f4bc011554f..d4f98812586 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/tests/conftest.py b/tests/conftest.py index 643bf351be8..62ca433376d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -373,6 +373,7 @@ def fxt_seg_label_info() -> SegLabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -385,6 +386,7 @@ def fxt_multiclass_labelinfo() -> LabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -398,6 +400,7 @@ def fxt_multilabel_labelinfo() -> LabelInfo: [label_names[1]], [label_names[2]], ], + label_ids=["0", "1", "2"], ) @@ -464,6 +467,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: ["Spade_A", "Spade"], ["Spade_King", "Spade"], ], + label_ids=[str(i) for i in range(9)], ) diff --git a/tests/unit/algo/classification/conftest.py b/tests/unit/algo/classification/conftest.py index 945c3d0bc4c..a283eff41b1 100644 --- a/tests/unit/algo/classification/conftest.py +++ b/tests/unit/algo/classification/conftest.py @@ -31,6 +31,7 @@ def fxt_hlabel_data() -> HLabelInfo: ["Heart_Queen", "Heart_King"], ["Spade_A", "Spade_King"], ], + label_ids=[str(i) for i in range(6)], num_multiclass_heads=3, num_multilabel_classes=0, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4), "2": (4, 6)}, @@ -80,6 +81,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: "Red_Joker", "Extra_Joker", ], + label_ids=[str(i) for i in range(9)], label_groups=[ ["Heart", "Spade"], ["Heart_Queen", "Heart_King"], @@ -149,6 +151,7 @@ def fxt_hlabel_cifar() -> HLabelInfo: "aquatic_mammals", "fish", ], + label_ids=[str(i) for i in range(12)], label_groups=[ ["beaver", "dolphin", "otter", "seal", "whale"], ["aquarium_fish", "flatfish", "ray", "shark", "trout"], diff --git a/tests/unit/algo/detection/detectors/test_single_stage_detector.py b/tests/unit/algo/detection/detectors/test_single_stage_detector.py index 7d805062ed1..669e4d8c900 100644 --- a/tests/unit/algo/detection/detectors/test_single_stage_detector.py +++ b/tests/unit/algo/detection/detectors/test_single_stage_detector.py @@ -53,7 +53,7 @@ def batch(self): inputs = torch.randn(1, 3, 32, 32) return DetBatchDataEntity( batch_size=1, - imgs_info=[LabelInfo(["a"], [["a"]])], + imgs_info=[LabelInfo(["a"], ["0"], [["a"]])], images=inputs, bboxes=[torch.tensor([[0.5, 0.5, 0.5, 0.5]])], labels=[torch.tensor([0])], diff --git a/tests/unit/algo/detection/test_rtdetr.py b/tests/unit/algo/detection/test_rtdetr.py index d96305dfe13..6d2c92718a7 100644 --- a/tests/unit/algo/detection/test_rtdetr.py +++ b/tests/unit/algo/detection/test_rtdetr.py @@ -15,7 +15,7 @@ class TestRTDETR: def test_customize_outputs(self, mocker): - label_info = LabelInfo(["a", "b", "c"], [["a", "b", "c"]]) + label_info = LabelInfo(["a", "b", "c"], ["0", "1", "2"], [["a", "b", "c"]]) mocker.patch("otx.algo.detection.rtdetr.RTDETR._build_model", return_value=mocker.MagicMock()) model = RTDETR(model_name="rtdetr_18", label_info=label_info) model.model.load_from = None diff --git a/tests/unit/algo/segmentation/backbones/test_dinov2.py b/tests/unit/algo/segmentation/backbones/test_dinov2.py deleted file mode 100644 index 45fb2aaf84e..00000000000 --- a/tests/unit/algo/segmentation/backbones/test_dinov2.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock - -import pytest -import torch -from otx.algo.segmentation.backbones import dinov2 as target_file -from otx.algo.segmentation.backbones.dinov2 import DinoVisionTransformer - - -class TestDinoVisionTransformer: - @pytest.fixture() - def mock_backbone_named_parameters(self) -> dict[str, MagicMock]: - named_parameter = {} - for i in range(3): - parameter = MagicMock() - parameter.requires_grad = True - named_parameter[f"layer_{i}"] = parameter - return named_parameter - - @pytest.fixture() - def mock_backbone(self, mock_backbone_named_parameters) -> MagicMock: - backbone = MagicMock() - backbone.named_parameters.return_value = list(mock_backbone_named_parameters.items()) - return backbone - - @pytest.fixture(autouse=True) - def mock_torch_hub_load(self, mocker, mock_backbone): - return mocker.patch("otx.algo.segmentation.backbones.dinov2.torch.hub.load", return_value=mock_backbone) - - def test_init(self, mock_backbone, mock_backbone_named_parameters): - dino = DinoVisionTransformer(model_name="dinov2_vits14", freeze_backbone=True, out_index=[8, 9, 10, 11]) - - assert dino.backbone == mock_backbone - for parameter in mock_backbone_named_parameters.values(): - assert parameter.requires_grad is False - - @pytest.fixture() - def dino_vit(self) -> DinoVisionTransformer: - return DinoVisionTransformer( - model_name="dinov2_vits14", - freeze_backbone=True, - out_index=[8, 9, 10, 11], - ) - - def test_forward(self, dino_vit, mock_backbone): - tensor = torch.rand(10, 3, 3, 3) - dino_vit.forward(tensor) - - mock_backbone.assert_called_once_with(tensor) - - @pytest.fixture() - def mock_load_from_http(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_from_http") - - @pytest.fixture() - def mock_load_checkpoint_to_model(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_checkpoint_to_model") - - @pytest.fixture() - def pretrained_weight(self, tmp_path) -> str: - weight = tmp_path / "pretrained.pth" - weight.touch() - return str(weight) - - @pytest.fixture() - def mock_torch_load(self, mocker) -> MagicMock: - return mocker.patch("otx.algo.segmentation.backbones.mscan.torch.load") - - def test_load_pretrained_weights(self, dino_vit, pretrained_weight, mock_torch_load, mock_load_checkpoint_to_model): - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - mock_torch_load.assert_called_once_with(pretrained_weight, "cpu") - mock_load_checkpoint_to_model.assert_called_once() - - def test_load_pretrained_weights_from_url(self, dino_vit, mock_load_from_http, mock_load_checkpoint_to_model): - pretrained_weight = "www.fake.com/fake.pth" - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - mock_load_from_http.assert_called_once_with(filename=pretrained_weight, map_location="cpu", model_dir=cache_dir) - mock_load_checkpoint_to_model.assert_called_once() diff --git a/tests/unit/algo/segmentation/test_dino_v2_seg.py b/tests/unit/algo/segmentation/test_dino_v2_seg.py index e722254e47f..c9ad5a93946 100644 --- a/tests/unit/algo/segmentation/test_dino_v2_seg.py +++ b/tests/unit/algo/segmentation/test_dino_v2_seg.py @@ -10,9 +10,9 @@ class TestDinoV2Seg: - @pytest.fixture(scope="class") + @pytest.fixture() def fxt_dino_v2_seg(self) -> DinoV2Seg: - return DinoV2Seg(label_info=10, model_name="dinov2_vits14", input_size=(560, 560)) + return DinoV2Seg(label_info=10, model_name="dinov2-small-seg", input_size=(518, 518)) def test_dino_v2_seg_init(self, fxt_dino_v2_seg): assert isinstance(fxt_dino_v2_seg, DinoV2Seg) @@ -21,7 +21,7 @@ def test_dino_v2_seg_init(self, fxt_dino_v2_seg): def test_exporter(self, fxt_dino_v2_seg): exporter = fxt_dino_v2_seg._exporter assert isinstance(exporter, OTXModelExporter) - assert exporter.input_size == (1, 3, 560, 560) + assert exporter.input_size == (1, 3, 518, 518) def test_optimization_config(self, fxt_dino_v2_seg): config = fxt_dino_v2_seg._optimization_config @@ -32,7 +32,7 @@ def test_optimization_config(self, fxt_dino_v2_seg): @pytest.mark.parametrize( "model", [ - DinoV2Seg(model_name="dinov2_vits14", label_info=3), + DinoV2Seg(model_name="dinov2-small-seg", label_info=3, input_size=(518, 518)), ], ) def test_compiled_model(self, model): @@ -44,6 +44,6 @@ def test_compiled_model(self, model): model.model = torch.compile(model.model, backend=cnt) # Prepare inputs - x = torch.randn(1, 3, 560, 560) + x = torch.randn(1, 3, 518, 518) model.model(x) assert cnt.frame_count == 1 diff --git a/tests/unit/algo/visual_prompting/test_sam.py b/tests/unit/algo/visual_prompting/test_sam.py index 33bf8f6df39..4091ab9db01 100644 --- a/tests/unit/algo/visual_prompting/test_sam.py +++ b/tests/unit/algo/visual_prompting/test_sam.py @@ -139,6 +139,43 @@ def test_forward_for_tracing(self, mocker) -> None: ori_shape=ori_shape, ) +<<<<<<< HEAD +======= + +class TestSAM: + @pytest.fixture() + def sam(self) -> SAM: + return SAM(backbone_type="tiny_vit") + + def test_initialization(self, mocker) -> None: + mock_freeze_networks = mocker.patch.object(CommonSettingMixin, "freeze_networks") + mock_load_checkpoint = mocker.patch.object(CommonSettingMixin, "load_checkpoint") + + sam = SAM(backbone_type="tiny_vit") + + assert sam.backbone_type == "tiny_vit" + assert sam.image_size == 1024 + assert sam.image_embedding_size == 64 + assert sam.use_stability_score is False + assert sam.return_single_mask is True + assert sam.return_extra_metrics is False + assert sam.stability_score_offset == 1.0 + + mock_load_checkpoint.assert_called_once_with(load_from=sam.load_from["tiny_vit"]) + mock_freeze_networks.assert_called_once_with(True, True, False) + + def test_build_model(self, sam: SAM) -> None: + segment_anything = sam._build_model() + assert segment_anything is not None + assert isinstance(segment_anything, torch.nn.Module) + assert segment_anything.__class__.__name__ == "SegmentAnything" + + assert isinstance(segment_anything.image_encoder, TinyViT) + assert isinstance(segment_anything.prompt_encoder, SAMPromptEncoder) + assert isinstance(segment_anything.mask_decoder, SAMMaskDecoder) + assert isinstance(segment_anything.criterion, SAMCriterion) + +>>>>>>> releases/2.2.0 class TestSAM: @pytest.fixture() diff --git a/tests/unit/cli/test_cli.py b/tests/unit/cli/test_cli.py index 3b2501066ce..07aa5d083e2 100644 --- a/tests/unit/cli/test_cli.py +++ b/tests/unit/cli/test_cli.py @@ -188,7 +188,7 @@ def test_print_config_scheduler_override_command(self, fxt_print_config_schedule scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 monitor: val/test_f1 warmup_interval: step main_scheduler_callable: diff --git a/tests/unit/core/data/conftest.py b/tests/unit/core/data/conftest.py index 665bc5a7471..e2821b17281 100644 --- a/tests/unit/core/data/conftest.py +++ b/tests/unit/core/data/conftest.py @@ -193,6 +193,7 @@ def fxt_mock_hlabelinfo(): return HLabelInfo( label_names=_LABEL_NAMES, label_groups=[["Non-Rigid", "Rigid"], ["Rectangle", "Triangle"], ["Circle"], ["Lion"], ["Panda"]], + label_ids=_LABEL_NAMES, num_multiclass_heads=2, num_multilabel_classes=3, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4)}, diff --git a/tests/unit/core/data/test_factory.py b/tests/unit/core/data/test_factory.py index 9877739862b..a3a6f92d4af 100644 --- a/tests/unit/core/data/test_factory.py +++ b/tests/unit/core/data/test_factory.py @@ -102,6 +102,7 @@ def test_create( cfg_subset=cfg_subset, vpm_config=vpm_config, image_color_channel=image_color_channel, + data_format="", ), dataset_cls, ) diff --git a/tests/unit/core/data/test_tiling.py b/tests/unit/core/data/test_tiling.py index ba232509dd7..e3de0f66baa 100644 --- a/tests/unit/core/data/test_tiling.py +++ b/tests/unit/core/data/test_tiling.py @@ -21,7 +21,7 @@ from otx.algo.instance_segmentation.maskrcnn import MaskRCNN from otx.algo.segmentation.litehrnet import LiteHRNet from otx.core.config.data import ( - SubsetConfig, + SamplerConfig, TileConfig, VisualPromptingConfig, ) @@ -33,7 +33,6 @@ from otx.core.data.module import OTXDataModule from otx.core.model.detection import OTXDetectionModel from otx.core.types.task import OTXTaskType -from otx.core.types.transformer_libs import TransformLibType from torchvision import tv_tensors from tests.test_helpers import generate_random_bboxes @@ -45,6 +44,7 @@ def mock_otx_det_model(self) -> OTXDetectionModel: return create_autospec(OTXDetectionModel) @pytest.fixture() +<<<<<<< HEAD def fxt_data_roots(self) -> dict[OTXTaskType, Path]: parent_root = Path(__file__).parent.parent.parent.parent / "assets" return { @@ -122,6 +122,28 @@ def fxt_data_config(self, fxt_data_roots) -> dict[dict]: "tile_config": TileConfig(), "vpm_config": VisualPromptingConfig(), }, +======= + def fxt_det_transform_config(self) -> DictConfig: + config = OmegaConf.load("src/otx/recipe/_base_/data/detection_tile.yaml") + config.train_subset.input_size = config.input_size + config.val_subset.input_size = config.input_size + config.test_subset.input_size = config.input_size + config.train_subset.sampler = SamplerConfig(**config.train_subset.sampler) + return config + + @pytest.fixture() + def fxt_det_data_config(self, fxt_det_transform_config) -> dict: + data_root = Path(__file__).parent.parent.parent.parent / "assets" / "car_tree_bug" + + return { + "data_format": "coco_instances", + "data_root": data_root, + "train_subset": fxt_det_transform_config.train_subset, + "val_subset": fxt_det_transform_config.val_subset, + "test_subset": fxt_det_transform_config.test_subset, + "tile_config": TileConfig(), + "vpm_config": VisualPromptingConfig(), +>>>>>>> releases/2.2.0 } def det_dummy_forward(self, x: DetBatchDataEntity) -> DetBatchPredEntity: @@ -461,6 +483,7 @@ def test_explain_det_tile_merge(self, fxt_data_config): assert prediction.saliency_map[0].ndim == 3 self.explain_mode = False +<<<<<<< HEAD def test_instseg_tile_merge(self, fxt_data_config): data_config = fxt_data_config[OTXTaskType.INSTANCE_SEGMENTATION] model = MaskRCNN(label_info=3, model_name="maskrcnn_efficientnet_b2b", input_size=(256, 256)) @@ -471,6 +494,15 @@ def test_instseg_tile_merge(self, fxt_data_config): tile_datamodule = OTXDataModule( task=OTXTaskType.INSTANCE_SEGMENTATION, **data_config, +======= + def test_instseg_tile_merge(self, fxt_det_data_config): + model = MaskRCNNEfficientNet(label_info=3) + # Enable tile adapter + fxt_det_data_config["tile_config"] = TileConfig(enable_tiler=True) + tile_datamodule = OTXDataModule( + task=OTXTaskType.INSTANCE_SEGMENTATION, + **fxt_det_data_config, +>>>>>>> releases/2.2.0 ) self.explain_mode = False @@ -480,6 +512,7 @@ def test_instseg_tile_merge(self, fxt_data_config): for batch in tile_datamodule.val_dataloader(): model.forward_tiles(batch) +<<<<<<< HEAD def test_explain_instseg_tile_merge(self, fxt_data_config): data_config = fxt_data_config[OTXTaskType.INSTANCE_SEGMENTATION] model = MaskRCNN(label_info=3, model_name="maskrcnn_efficientnet_b2b", input_size=(256, 256)) @@ -490,6 +523,15 @@ def test_explain_instseg_tile_merge(self, fxt_data_config): tile_datamodule = OTXDataModule( task=OTXTaskType.INSTANCE_SEGMENTATION, **data_config, +======= + def test_explain_instseg_tile_merge(self, fxt_det_data_config): + model = MaskRCNNEfficientNet(label_info=3) + # Enable tile adapter + fxt_det_data_config["tile_config"] = TileConfig(enable_tiler=True, enable_adaptive_tiling=False) + tile_datamodule = OTXDataModule( + task=OTXTaskType.INSTANCE_SEGMENTATION, + **fxt_det_data_config, +>>>>>>> releases/2.2.0 ) self.explain_mode = model.explain_mode = True diff --git a/tests/unit/core/data/transform_libs/test_torchvision.py b/tests/unit/core/data/transform_libs/test_torchvision.py index 3aad061118e..0670197bb7a 100644 --- a/tests/unit/core/data/transform_libs/test_torchvision.py +++ b/tests/unit/core/data/transform_libs/test_torchvision.py @@ -22,7 +22,10 @@ CachedMixUp, CachedMosaic, Compose, +<<<<<<< HEAD Decode3DInputsAffineTransforms, +======= +>>>>>>> releases/2.2.0 DecodeVideo, FilterAnnotations, GetBBoxCenterScale, diff --git a/tests/unit/core/metrics/test_accuracy.py b/tests/unit/core/metrics/test_accuracy.py index d3c43a8a087..33040fa881d 100644 --- a/tests/unit/core/metrics/test_accuracy.py +++ b/tests/unit/core/metrics/test_accuracy.py @@ -52,7 +52,11 @@ def test_default_multi_class_cls_metric_callable(self, fxt_multiclass_labelinfo: metric = MultiClassClsMetricCallable(fxt_multiclass_labelinfo) assert isinstance(metric.accuracy, MulticlassAccuracy) +<<<<<<< HEAD one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]]) +======= + one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]], label_ids=["0"]) +>>>>>>> releases/2.2.0 assert one_class_label_info.num_classes == 1 binary_metric = MultiClassClsMetricCallable(one_class_label_info) assert isinstance(binary_metric.accuracy, BinaryAccuracy) diff --git a/tests/unit/core/model/test_segmentation.py b/tests/unit/core/model/test_segmentation.py index b7181ce87cc..9f81fda86ab 100644 --- a/tests/unit/core/model/test_segmentation.py +++ b/tests/unit/core/model/test_segmentation.py @@ -34,6 +34,7 @@ def label_info(self): return SegLabelInfo( label_names=["Background", "label_0", "label_1"], label_groups=[["Background", "label_0", "label_1"]], + label_ids=["0", "1", "2"], ) @pytest.fixture() @@ -64,8 +65,16 @@ def test_export_parameters(self, model): ("label_info", "expected_label_info"), [ ( - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), ), (SegLabelInfo.from_num_classes(num_classes=5), SegLabelInfo.from_num_classes(num_classes=5)), ], diff --git a/tests/unit/core/types/test_export.py b/tests/unit/core/types/test_export.py index 72add6f3c31..70a4aa1aa2f 100644 --- a/tests/unit/core/types/test_export.py +++ b/tests/unit/core/types/test_export.py @@ -52,3 +52,4 @@ def test_wrap(fxt_label_info, task_type): assert ("model_info", "tile_size") in metadata assert ("model_info", "tiles_overlap") in metadata assert ("model_info", "max_pred_number") in metadata + assert ("model_info", "otx_version") in metadata diff --git a/tests/unit/core/types/test_label.py b/tests/unit/core/types/test_label.py index 3ae1ae1f463..c557bc8f7f3 100644 --- a/tests/unit/core/types/test_label.py +++ b/tests/unit/core/types/test_label.py @@ -4,7 +4,11 @@ from datumaro import LabelCategories from datumaro.components.annotation import GroupType +<<<<<<< HEAD from otx.core.types.label import HLabelInfo, NullLabelInfo, SegLabelInfo +======= +from otx.core.types.label import HLabelInfo, LabelInfo, NullLabelInfo, SegLabelInfo +>>>>>>> releases/2.2.0 def test_as_json(fxt_label_info): @@ -13,17 +17,47 @@ def test_as_json(fxt_label_info): assert fxt_label_info == deserialized +def test_label_info_from_arrow(): + labels = [ + LabelCategories.Category(name="car", attributes={"__name__car"}), + LabelCategories.Category(name="truck", attributes={"__name__truck"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + label_info = LabelInfo.from_dm_label_groups_arrow(dm_label_categories) + assert len(label_info.label_names) == 2 + assert len(label_info.label_names) == 2 + assert len(label_info.label_groups[0]) == 2 + assert "car" in label_info.label_names + assert "truck" in label_info.label_names + + def test_seg_label_info(): # Automatically insert background label at zero index assert SegLabelInfo.from_num_classes(3) == SegLabelInfo( ["label_0", "label_1", "label_2"], + ["0", "1", "2"], [["label_0", "label_1", "label_2"]], ) - assert SegLabelInfo.from_num_classes(1) == SegLabelInfo(["background", "label_0"], [["background", "label_0"]]) + assert SegLabelInfo.from_num_classes(1) == SegLabelInfo( + ["background", "label_0"], + ["0", "1"], + [["background", "label_0"]], + ) assert SegLabelInfo.from_num_classes(0) == NullLabelInfo() +<<<<<<< HEAD # Unit test +======= +>>>>>>> releases/2.2.0 def test_hlabel_info(): labels = [ LabelCategories.Category(name="car", parent="vehicle"), @@ -52,3 +86,41 @@ def test_hlabel_info(): assert list(hlabel_info.class_to_group_idx.keys()) == list( hlabel_info.label_to_idx.keys(), ), "class_to_group_idx and label_to_idx keys do not match" +<<<<<<< HEAD +======= + + +def test_hlabel_info_arrow(): + labels = [ + LabelCategories.Category(name="car", parent="vehicle", attributes={"__name__car"}), + LabelCategories.Category(name="truck", parent="vehicle", attributes={"__name__truck"}), + LabelCategories.Category(name="plush_toy", parent="plush toy", attributes={"__name__plush toy"}), + LabelCategories.Category(name="No class", attributes={"__name__No class"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup( + name="Detection labels___plush toy", + labels=["plush toy"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup(name="No class", labels=["No class"], group_type=GroupType.RESTRICTED), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + hlabel_info = HLabelInfo.from_dm_label_groups_arrow(dm_label_categories) + + # Check if class_to_group_idx and label_to_idx have the same keys + assert list(hlabel_info.class_to_group_idx.keys()) == list( + hlabel_info.label_to_idx.keys(), + ), "class_to_group_idx and label_to_idx keys do not match" + + assert len(hlabel_info.label_names) == 3 + assert "No class" not in hlabel_info.label_names + for label in ["car", "truck", "plush toy"]: + assert label in hlabel_info.label_names +>>>>>>> releases/2.2.0 diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index f9ad33b0761..681b1b24639 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -131,9 +131,9 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: # With label_info label_names = ["class1", "class2", "class3"] label_info = ( - LabelInfo(label_names=label_names, label_groups=[label_names]) + LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) if fxt_task != OTXTaskType.SEMANTIC_SEGMENTATION - else SegLabelInfo(label_names=label_names, label_groups=[label_names]) + else SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) ) model = auto_configurator.get_model(label_info=label_info) assert isinstance(model, OTXModel) @@ -147,7 +147,7 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: def test_get_model_set_input_size(self) -> None: auto_configurator = AutoConfigurator(task=OTXTaskType.MULTI_CLASS_CLS) label_names = ["class1", "class2", "class3"] - label_info = LabelInfo(label_names=label_names, label_groups=[label_names]) + label_info = LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) input_size = 300 model = auto_configurator.get_model(label_info=label_info, input_size=input_size) diff --git a/tests/unit/tools/test_converter.py b/tests/unit/tools/test_converter.py index 711b92b6bd4..eb35d890a4a 100644 --- a/tests/unit/tools/test_converter.py +++ b/tests/unit/tools/test_converter.py @@ -19,7 +19,7 @@ def test_convert(self): assert config["data"]["train_subset"]["num_workers"] == 8 assert config["data"]["val_subset"]["num_workers"] == 8 assert config["data"]["test_subset"]["num_workers"] == 8 - assert config["callbacks"][0]["init_args"]["patience"] == 10 + assert config["callbacks"][0]["init_args"]["patience"] == 4 assert config["data"]["tile_config"]["enable_tiler"] is True assert config["data"]["tile_config"]["overlap"] == 0.5 @@ -57,6 +57,6 @@ def test_instantiate(self, tmp_path): assert engine.datamodule.tile_config.enable_tiler assert len(train_kwargs["callbacks"]) == len(config["callbacks"]) - assert train_kwargs["callbacks"][0].patience == 10 + assert train_kwargs["callbacks"][0].patience == 4 assert len(train_kwargs["logger"]) == len(config["logger"]) assert train_kwargs["max_epochs"] == 50 diff --git a/tox.ini b/tox.ini index 7aa0fa1ad5b..c4ca765e6c8 100644 --- a/tox.ini +++ b/tox.ini @@ -56,7 +56,11 @@ commands = {posargs} +<<<<<<< HEAD [testenv:integration-test-{all, action, classification, multi_cls_classification, multi_label_classification, hlabel_classification, detection, rotated_detection, keypoint_detection, instance_segmentation, semantic_segmentation, visual_prompting_all, visual_prompting, zero_shot_visual_prompting, anomaly, anomaly_classification, anomaly_detection, anomaly_segmentation, object_detection_3d}] +======= +[testenv:integration-test-{all, action, classification, multi_cls_classification, multi_label_classification, hlabel_classification, detection, rotated_detection, keypoint_detection, instance_segmentation, semantic_segmentation, visual_prompting_all, visual_prompting, zero_shot_visual_prompting, anomaly, anomaly_classification, anomaly_detection, anomaly_segmentation}] +>>>>>>> releases/2.2.0 setenv = CUBLAS_WORKSPACE_CONFIG=:4096:8 deps =