wip

Signed-off-by: arendu <[email protected]>
NVIDIA · Oct 30, 2024 · 3ed1cb1 · 3ed1cb1
1 parent bd590d6
commit 3ed1cb1
Show file tree

Hide file tree

Showing 4 changed files with 428 additions and 4 deletions.
diff --git a/examples/nlp/gpt/conf/gpt_dpo.yaml b/examples/nlp/gpt/conf/gpt_dpo.yaml
@@ -57,6 +57,7 @@ model:
   micro_batch_size: 1
   global_batch_size: 64
   megatron_amp_O2: True
+  mamba_hybrid: False
 
   dpo:
     # This default value ensures there are no numeric differences beween trained and reference policies when computing log probs.

diff --git a/examples/nlp/gpt/train_gpt_dpo.py b/examples/nlp/gpt/train_gpt_dpo.py
@@ -21,7 +21,7 @@
 from nemo.utils.exp_manager import exp_manager
 from nemo_aligner.algorithms.dpo import DPOTrainer, dpo_custom_collate
 from nemo_aligner.data.nlp.builders import build_dataloader, build_train_valid_test_dpo_datasets
-from nemo_aligner.models.nlp.gpt.megatron_gpt_dpo_model import MegatronGPTDPOModel
+from nemo_aligner.models.nlp.gpt.megatron_gpt_dpo_model import MegatronGPTDPOModel, MegatronMambaDPOModel
 from nemo_aligner.utils.distributed import Timer
 from nemo_aligner.utils.train_script_utils import (
     CustomLoggerWrapper,
@@ -53,7 +53,7 @@ def main(cfg) -> None:
     logger = CustomLoggerWrapper(trainer.loggers)
 
     ptl_model = load_from_nemo(
-        MegatronGPTDPOModel,
+        MegatronMambaDPOModel if cfg.model.mamba_hybrid else MegatronGPTDPOModel,
         cfg.model,
         trainer,
         strict=True,