Skip to content

Commit

Permalink
chore: sync main to dev (#461)
Browse files Browse the repository at this point in the history
Signed-off-by: Terry Kong <[email protected]>
Signed-off-by: ashors1 <[email protected]>
Signed-off-by: Oliver Koenig <[email protected]>
Signed-off-by: oliver könig <[email protected]>
Co-authored-by: Anna Shors <[email protected]>
Co-authored-by: oliver könig <[email protected]>
  • Loading branch information
3 people authored Dec 20, 2024
1 parent f817eb1 commit b025619
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 25 deletions.
66 changes: 61 additions & 5 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
outputs:
test_to_run: ${{ steps.test_to_run.outputs.main }}
all: ${{ steps.all.outputs.main }}
run_ci: ${{ steps.evaluate.outputs.run_ci }}
steps:
- name: Parse test_to_run
id: test_to_run
Expand All @@ -51,23 +52,57 @@ jobs:
id: all
run: |
echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
- name: Get changed files
id: changed-files
if: github.event_name == 'pull_request'
uses: tj-actions/changed-files@v44
with:
files_yaml: |
doc:
- '**.md'
- docs/**
src:
- '!**.md'
- '!docs/**'
- name: Evaluate conditions
id: evaluate
env:
DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
LABEL: ${{ github.event.label.name == 'Run CICD' }}
run: |
# Some output that's helpful for debugging
echo "Docs changed: $CHANGED_DOCS"
echo "Src changed: $CHANGED_SRC"
echo "DOCS_ONLY: $DOCS_ONLY"
echo "LABEL: $LABEL"
echo "IS_PULLREQUEST: $IS_PULLREQUEST"
# Run CI only (on main or if label is attached) and if it's not only docs
echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
build-container:
if: ${{ github.event.label.name == 'Run CICD' || github.ref == 'refs/heads/main' }}
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
needs: [pre-flight]
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
with:
image-name: nemo_aligner_container
dockerfile: Dockerfile
image-label: nemo-aligner
build-args: |
MAX_JOBS=32
ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }}
ALIGNER_COMMIT=${{ github.sha }}
Unit_Tests:
name: ${{ matrix.test_case }}
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'unit') || needs.pre-flight.outputs.all == 'true'
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
strategy:
matrix:
test_case:
Expand All @@ -85,7 +120,7 @@ jobs:
name: ${{ matrix.test_case }}
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'functional') || needs.pre-flight.outputs.all == 'true'
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
strategy:
matrix:
test_case:
Expand All @@ -104,3 +139,24 @@ jobs:
TIMEOUT: 10
SCRIPT: |
bash /opt/NeMo-Aligner/tests/functional/test_cases/${{ matrix.test_case }}
CI_QA_Gate:
name: CI quality check
if: always()
runs-on: ubuntu-latest
needs:
- Unit_Tests
- Functional_Tests
steps:
- name: main
env:
JOB_RESULTS: ${{ toJSON(needs) }}
ALL_SUCCESS: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
run: |
SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
echo '🤖: CICD Result' >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
17 changes: 11 additions & 6 deletions .github/workflows/release-freeze.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,25 @@ name: "Code freeze"
on:
workflow_dispatch:
inputs:
type_of_release:
release-type:
type: choice
description: Type of release
options:
- major
- minor

freeze-commit:
type: string
description: Commit SHA to use for cut-off
required: false
default: main
jobs:
code-freeze:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.8.0
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.17.3
with:
name_of_library: NeMo-Aligner
type_of_release: ${{ inputs.type_of_release }}
python_package: nemo_aligner
library-name: NeMo-Aligner
python-package: nemo_aligner
release-type: ${{ inputs.release-type }}
freeze-commit: ${{ inputs.freeze-commit }}
secrets:
SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
6 changes: 3 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ on:
required: true
default: true
type: boolean

jobs:
release:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.15.0
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.17.3
with:
release-ref: ${{ inputs.release-ref }}
image-name: nemo_aligner_container
Expand All @@ -46,5 +46,5 @@ jobs:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
PAT: ${{ secrets.PAT }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
PAT: ${{ secrets.PAT }}
8 changes: 6 additions & 2 deletions docs/user-guide/dpo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,14 @@ To start, we must first get a pretrained model to align. There are two models we
--in-folder ./model_checkpoint \
--out-file ./mcore_gpt.nemo
.. tab-item:: LLaMa3 7B
.. tab-item:: LLaMa3 8B
:sync: key2

#. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder.
#. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder. You can use the Hugging Face CLI for this:
.. code-block:: bash
huggingface-cli download meta-llama/Meta-Llama-3-8B --local-dir /path/to/llama
#. Convert the LLaMa3 LLM into ``.nemo`` format.
.. code-block:: bash
Expand Down
18 changes: 9 additions & 9 deletions docs/user-guide/reinforce.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
.. _model-aligner-reinforce:

Model Alignment by REINFORCE
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@

In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.

REINFORCE is usually preceded by a Supervised Fine-Tuning (SFT). We should first follow the :ref:`Prerequisite guide <prerequisite>` and the :ref:`SFT guide <sft>`. After obtaining the SFT model, we will also need to train a reward model as in :ref:`PPO guide <ppo>`. We will use the REINFORCE algorithm on the `Anthropic-HH-RLHF <https://huggingface.co/datasets/Anthropic/hh-rlhf>`__ dataset.

REINFORCE Training
############
##################

After you have fine-tuned a GPT model using Supervised Fine-Tuning (SFT), and trained a reward model as explained in the preceding section, you can start aligning the policy using REINFORCE.

Expand Down Expand Up @@ -48,7 +48,7 @@ To launch the server:
The above example launches the reward model server on eight GPUs and one node. Make sure to change trainer.devices, trainer.num_nodes depending on your model size and scale. Aligner will work on any scale. Also, make sure to tune the trainer.reinforce.inference_micro_batch_size argument. This argument sets the size of the batch the REINFORCE actor is allowed to send to the reward per DP rank.

Launch the Initial Policy and REINFORCE Actor Training
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

The REINFORCE Actor training job contains the master controller that makes the HTTP calls to all servers when needed. To launch the REINFORCE Actor and Initial Policy server:

Expand All @@ -58,7 +58,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
TRAIN_DATA_PATH="/path/to/train_prompts.jsonl"
VALID_DATA_PATH="/path/to/test_prompts.jsonl"
PRETRAINED_ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
RESULTS_DIR="/path/to/actor_results_dir"
USE_FLASK=False
Expand All @@ -73,7 +73,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
cd ${GPFS}
export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
&& export HYDRA_FULL_ERROR=1 \
&& python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
&& mpirun -n 8 --allow-run-as-root python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
"model.data.data_prefix={train: [${TRAIN_DATA_PATH}], validation: [${VALID_DATA_PATH}], test: [${VALID_DATA_PATH}]}" \
pretrained_checkpoint.restore_from_path=\"${ACTOR_NEMO_FILE}\" \
exp_manager.checkpoint_callback_params.save_top_k=1 \
Expand Down Expand Up @@ -114,7 +114,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
The above command launches the initial and actor server on one node with eight GPUs.

Launching Both Servers for REINFORCE training
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

You can use slurm to launch the two jobs and get them to coordinate together in a full REINFORCE job through the following:

Expand Down Expand Up @@ -239,7 +239,7 @@ You can use slurm to launch the two jobs and get them to coordinate together in
trainer.reinforce.rollout_batch_seq_length=4096
EOF
srun --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
srun --mpi=pmix --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
wait
Expand All @@ -251,6 +251,6 @@ It is important to launch all jobs with ``&`` after the srun command to ensure t
Make sure to change the reward model arg ``trainer.reinforce.inference_micro_batch_size`` such that ``trainer.reinforce.inference_micro_batch_size * DP size <= model.reinforce.rollout_micro_batch_size``.
REINFORCE Results
%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%
After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
24 changes: 24 additions & 0 deletions docs/user-guide/rlhf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,30 @@ NeMo-Aligner has support for accelerating RLHF with `TensorRT-LLM <https://githu
For more information please see the NeMo-Aligner `paper <https://arxiv.org/abs/2405.01481>`__.
.. note::
If you are running ``train_gpt_ppo_actor.py`` interactively (outside of SLURM) with TensorRT-LLM acceleration,
you must prepend ``mpirun -n 8 --allow-run-as-root`` to the python run command:
.. code-block:: bash
mpirun -n 8 --allow-run-as-root python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py ...
If you are using SLURM, you do not need to prepend ``mpirun`` since this will be handled automatically
if you run ``srun`` with ``--mpi=pmix``:
.. code-block:: bash
read -r -d '' cmd_ppo <<EOF
cd ${GPFS} \
&& export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
&& export HYDRA_FULL_ERROR=1 \
&& python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py \
...
EOF
srun --mpi=pmix ... bash -c "${cmd_ppo}"
PPO Results with TensorRT-LLM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down

0 comments on commit b025619

Please sign in to comment.