Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix early stopping in converter patching + fix lr warmup for all tasks #4131

Merged
merged 14 commits into from
Dec 4, 2024
2 changes: 1 addition & 1 deletion src/otx/algo/callbacks/adaptive_early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(
self,
monitor: str,
min_delta: float = 0.0,
patience: int = 3,
patience: int = 10,
verbose: bool = False,
mode: str = "min",
strict: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion src/otx/core/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@
return super().lr_scheduler_step(scheduler=scheduler, metric=metric)

if len(warmup_schedulers) != 1:
msg = "No more than two warmup schedulers coexist."
msg = "No more than one warmup schedulers coexist."

Check warning on line 747 in src/otx/core/model/base.py

View check run for this annotation

Codecov / codecov/patch

src/otx/core/model/base.py#L747

Added line #L747 was not covered by tests
raise RuntimeError(msg)

warmup_scheduler = next(iter(warmup_schedulers))
Expand Down
11 changes: 6 additions & 5 deletions src/otx/core/schedulers/warmup_schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@ class LinearWarmupScheduler(LambdaLR):
"""Linear Warmup scheduler.

Args:
num_warmup_steps: Learning rate will linearly increased during the period same as this number.
warmup_interval: If "epoch", count the number of steps for the warmup period.
optimizer (Optimizer): Optimizer to apply the scheduler.
num_warmup_steps (int): Learning rate will linearly increased during the period same as this number.
interval (Literal["step", "epoch"]): If "epoch", count the number of epochs for the warmup period.
Otherwise, the iteration step will be the warmup period.
"""

def __init__(
self,
optimizer: Optimizer,
num_warmup_steps: int = 1000,
interval: Literal["step", "epoch"] = "step",
interval: Literal["step", "epoch"] = "epoch",
):
if not num_warmup_steps > 0:
msg = f"num_warmup_steps should be > 0, got {num_warmup_steps}"
Expand All @@ -55,7 +56,7 @@ class LinearWarmupSchedulerCallable:
main_scheduler_callable: Callable to create a LR scheduler that will be mainly used.
num_warmup_steps: Learning rate will linearly increased during the period same as this number.
If it is less than equal to zero, do not create `LinearWarmupScheduler`.
warmup_interval: If "epoch", count the number of steps for the warmup period.
warmup_interval: If "epoch", count the number of epochs for the warmup period.
Otherwise, the iteration step will be the warmup period.
monitor: If given, override the main scheduler's `monitor` attribute.
"""
Expand All @@ -64,7 +65,7 @@ def __init__(
self,
main_scheduler_callable: LRSchedulerCallable,
num_warmup_steps: int = 0,
warmup_interval: Literal["step", "epoch"] = "step",
warmup_interval: Literal["step", "epoch"] = "epoch",
monitor: str | None = None,
):
self.main_scheduler_callable = SchedulerCallableSupportHPO.from_callable(main_scheduler_callable)
Expand Down
2 changes: 2 additions & 0 deletions src/otx/recipe/_base_/train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ callbacks:
init_args:
max_interval: 5
decay: -0.025
min_earlystop_patience: 5
min_lrschedule_patience: 3
logger:
- class_path: lightning.pytorch.loggers.csv_logs.CSVLogger
init_args:
Expand Down
2 changes: 1 addition & 1 deletion src/otx/recipe/anomaly_classification/stfpm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ overrides:
precision: 32
max_epochs: 100
callbacks:
- class_path: lightning.pytorch.callbacks.EarlyStopping
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 5
mode: max
Expand Down
2 changes: 1 addition & 1 deletion src/otx/recipe/anomaly_detection/stfpm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ overrides:
precision: 32
max_epochs: 100
callbacks:
- class_path: lightning.pytorch.callbacks.EarlyStopping
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 5
mode: max
Expand Down
2 changes: 1 addition & 1 deletion src/otx/recipe/anomaly_segmentation/stfpm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ overrides:
precision: 32
max_epochs: 100
callbacks:
- class_path: lightning.pytorch.callbacks.EarlyStopping
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 5
mode: max
Expand Down
18 changes: 9 additions & 9 deletions src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ model:
weight_decay: 0.05

scheduler:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
mode: max
factor: 0.5
patience: 1
monitor: val/accuracy
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: H_LABEL_CLS
Expand All @@ -26,10 +30,6 @@ callback_monitor: val/accuracy
data: ../../_base_/data/classification.yaml
overrides:
max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
18 changes: 9 additions & 9 deletions src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ model:
weight_decay: 0.0001

scheduler:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
mode: max
factor: 0.5
patience: 1
monitor: val/accuracy
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: H_LABEL_CLS
Expand All @@ -30,10 +34,6 @@ overrides:
- data.train_subset.transforms

max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
13 changes: 12 additions & 1 deletion src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ model:
momentum: 0.9
weight_decay: 0.0001

scheduler:
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: H_LABEL_CLS
device: auto
Expand All @@ -25,7 +37,6 @@ overrides:
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3
warmup_iters: 750
data:
task: H_LABEL_CLS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ model:
init_args:
mode: max
factor: 0.5
patience: 1
patience: 5
monitor: val/accuracy

engine:
Expand All @@ -31,10 +31,6 @@ callback_monitor: val/accuracy
data: ../../_base_/data/classification.yaml
overrides:
max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ model:
weight_decay: 0.0001

scheduler:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
T_max: 100000
eta_min: 0
num_warmup_steps: 0
main_scheduler_callable:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
init_args:
T_max: 100000
eta_min: 0

engine:
task: H_LABEL_CLS
Expand All @@ -29,10 +33,6 @@ overrides:
- data.train_subset.transforms

max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ model:
weight_decay: 0.0001

scheduler:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
T_max: 100000
eta_min: 0
num_warmup_steps: 0
main_scheduler_callable:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
init_args:
T_max: 100000
eta_min: 0

engine:
task: H_LABEL_CLS
Expand All @@ -29,10 +33,6 @@ overrides:
- data.train_subset.transforms

max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ model:
weight_decay: 0.0001

scheduler:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
T_max: 100000
eta_min: 0
num_warmup_steps: 0
main_scheduler_callable:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
init_args:
T_max: 100000
eta_min: 0

engine:
task: H_LABEL_CLS
Expand All @@ -29,10 +33,6 @@ overrides:
- data.train_subset.transforms

max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
task: H_LABEL_CLS
Expand Down
18 changes: 9 additions & 9 deletions src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ model:
weight_decay: 0.05

scheduler:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
mode: max
factor: 0.5
patience: 1
monitor: val/accuracy
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: MULTI_CLASS_CLS
Expand All @@ -28,7 +32,3 @@ callback_monitor: val/accuracy
data: ../../_base_/data/classification.yaml
overrides:
max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3
18 changes: 9 additions & 9 deletions src/otx/recipe/classification/multi_class_cls/dino_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ model:
weight_decay: 0.05

scheduler:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
mode: max
factor: 0.5
patience: 1
monitor: val/accuracy
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: MULTI_CLASS_CLS
Expand All @@ -27,7 +31,3 @@ callback_monitor: val/accuracy
data: ../../_base_/data/classification.yaml
overrides:
max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ model:
weight_decay: 0.0001

scheduler:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
init_args:
mode: max
factor: 0.5
patience: 1
monitor: val/accuracy
num_warmup_steps: 0
main_scheduler_callable:
class_path: lightning.pytorch.cli.ReduceLROnPlateau
init_args:
mode: max
factor: 0.5
patience: 5
monitor: val/accuracy

engine:
task: MULTI_CLASS_CLS
Expand All @@ -31,10 +35,6 @@ overrides:
- data.train_subset.transforms

max_epochs: 90
callbacks:
- class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
init_args:
patience: 3

data:
train_subset:
Expand Down
Loading
Loading