diff --git a/src/otx/algo/detection/heads/dfine_decoder.py b/src/otx/algo/detection/heads/dfine_decoder.py index 782756f0e8..1a04d2eb83 100644 --- a/src/otx/algo/detection/heads/dfine_decoder.py +++ b/src/otx/algo/detection/heads/dfine_decoder.py @@ -334,15 +334,15 @@ def forward( """Forward function of the Transformer Decoder Layer. Args: - target (Tensor): _description_ - reference_points (Tensor): _description_ - value (Tensor): _description_ - spatial_shapes (list[list[int]]): _description_ - attn_mask (Tensor | None, optional): _description_. Defaults to None. - query_pos_embed (Tensor | None, optional): _description_. Defaults to None. + target (Tensor): target feature tensor. + reference_points (Tensor): reference points tensor. + value (Tensor): value tensor. + spatial_shapes (list[list[int]]): spatial shapes of the value tensor. + attn_mask (Tensor | None, optional): attention mask. Defaults to None. + query_pos_embed (Tensor | None, optional): query positional embedding. Defaults to None. Returns: - Tensor: _description_ + Tensor: updated target tensor. """ # self attention q = k = self.with_pos_embed(target, query_pos_embed) @@ -1133,20 +1133,20 @@ class DFINETransformer: "hidden_dim": 128, "dim_feedforward": 512, "num_levels": 2, - "num_layers": 3, + "num_decoder_layers": 3, "eval_idx": -1, "num_points_list": [6, 6], "eval_spatial_size": [640, 640], }, "dfine_hgnetv2_s": { "feat_channels": [256, 256, 256], - "num_layers": 3, + "num_decoder_layers": 3, "eval_idx": -1, "eval_spatial_size": [640, 640], "num_points_list": [3, 6, 3], }, "dfine_hgnetv2_m": { - "num_layers": 4, + "num_decoder_layers": 4, "eval_idx": -1, "eval_spatial_size": [640, 640], }, diff --git a/src/otx/algo/detection/necks/dfine_hybrid_encoder.py b/src/otx/algo/detection/necks/dfine_hybrid_encoder.py index b5a99b7c72..666d0c695d 100644 --- a/src/otx/algo/detection/necks/dfine_hybrid_encoder.py +++ b/src/otx/algo/detection/necks/dfine_hybrid_encoder.py @@ -78,9 +78,9 @@ class RepNCSPELAN4(nn.Module): c3 (int): c3 channel size. Refer to GELAN paper. c4 (int): c4 channel size. Refer to GELAN paper. n (int, optional): number of blocks. Defaults to 3. - bias (bool, optional): _description_. Defaults to False. - activation (Callable[..., nn.Module] | None, optional): _description_. Defaults to None. - normalization (Callable[..., nn.Module] | None, optional): _description_. Defaults to None. + bias (bool, optional): use bias. Defaults to False. + activation (Callable[..., nn.Module] | None, optional): activation function. Defaults to None. + normalization (Callable[..., nn.Module] | None, optional): norm layer. Defaults to None. """ def __init__(