From 3791aad8862c1057b50516270c75b3fb0cac283a Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Mon, 9 Dec 2024 12:37:51 -0800 Subject: [PATCH] fix: fix DPO sequence packing + pipeline parallel (#437) Signed-off-by: ashors1 --- nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py index d471ae367..8888456ce 100644 --- a/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py +++ b/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py @@ -111,6 +111,8 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ required_keys.add("attention_mask") if "cu_seqlens" in batch: required_keys.add("cu_seqlens") + required_keys.add("max_seqlen") + required_keys.add("cu_seqlens_argmin") if parallel_state.is_pipeline_first_stage(): if packed: