feat: update dockerfile to allow incremental builds and change TRTLLM…

… error to point to dockerfile (#290) Signed-off-by: Terry Kong <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
NVIDIA · Sep 24, 2024 · 3c35440 · 3c35440
1 parent cd088f4
commit 3c35440
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 18 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,3 +1,12 @@
+# To build NeMo-Aligner from a base PyTorch container:
+#
+#   docker buildx build -t aligner:latest .
+#
+# To update NeMo-Aligner from a pre-built NeMo-Framework container:
+#
+#   docker buildx build --target=aligner-bump --build-arg=BASE_IMAGE=nvcr.io/nvidia/nemo:24.07 -t aligner:latest .
+#
+
 # Number of parallel threads for compute heavy build jobs
 # if you get errors building TE or Apex, decrease this to 4
 ARG MAX_JOBS=8
@@ -12,16 +21,33 @@ ARG PROTOBUF_VERSION=4.24.4
 
 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3
 
-FROM ${BASE_IMAGE}
-
-ARG MAX_JOBS
+FROM ${BASE_IMAGE} AS aligner-bump
 
-# needed in case git complains that it can't detect a valid email, this email is fake but works
-RUN git config --global user.email "[email protected]"
+ARG ALIGNER_COMMIT
 
 WORKDIR /opt
 
+# NeMo Aligner
+RUN <<"EOF" bash -exu
+if [[ ! -d NeMo-Aligner ]]; then
+    git clone https://github.com/NVIDIA/NeMo-Aligner.git
+    cd NeMo-Aligner
+    git checkout $ALIGNER_COMMIT
+    pip install --no-deps -e .
+    cd -
+fi
+cd NeMo-Aligner
+git fetch -a
+git checkout -f ${ALIGNER_COMMIT}
+git pull
+EOF
+
+FROM aligner-bump as final
+
+# needed in case git complains that it can't detect a valid email, this email is fake but works
+RUN git config --global user.email "[email protected]"
 # install TransformerEngine
+ARG MAX_JOBS
 ARG TE_TAG
 RUN pip uninstall -y transformer-engine && \
     git clone https://github.com/NVIDIA/TransformerEngine.git && \
@@ -76,17 +102,6 @@ RUN pip uninstall -y megatron-core && \
     fi && \
     pip install -e .
 
-# NeMo Aligner
-ARG ALIGNER_COMMIT
-RUN git clone https://github.com/NVIDIA/NeMo-Aligner.git && \
-    cd NeMo-Aligner && \
-    git pull && \
-    if [ ! -z $ALIGNER_COMMIT ]; then \
-        git fetch origin $ALIGNER_COMMIT && \
-        git checkout FETCH_HEAD; \
-    fi && \
-    pip install --no-deps -e .
-
 # Git LFS
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
     apt-get install git-lfs && \

diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_ppo_actor.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_ppo_actor.py
@@ -50,7 +50,7 @@
     set_sync_funcs,
     set_train,
 )
-from nemo_aligner.utils.trt_llm import HAVE_TRTLLM, GPTGenerateTRTLLM
+from nemo_aligner.utils.trt_llm import GPTGenerateTRTLLM
 from nemo_aligner.utils.utils import (
     adapter_control,
     clear_memory,
@@ -81,7 +81,6 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
 
         self.use_trtllm_generation = "trt_llm" in self.cfg.ppo and self.cfg.ppo.trt_llm.enable
         if self.use_trtllm_generation:
-            assert HAVE_TRTLLM, "TRTLLM generation was enabled but TRTLLM was not able to be imported"
             self.trtllm_generate = GPTGenerateTRTLLM(
                 model_cfg=self.cfg,
                 max_generation_length=self.cfg.ppo.length_params.get("max_length", 1024),

diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py
@@ -53,6 +53,11 @@ def __init__(
         reshard_model=False,
         trt_model_dir="/tmp/trt_llm_model",
     ):
+        if not HAVE_TRTLLM:
+            raise RuntimeError(
+                "You are trying to use NeMo-Aligner's TensorRT-LLM acceleration for LLM generation. Please build the dockerfile to enable this feature: https://github.com/NVIDIA/NeMo-Aligner/blob/main/Dockerfile"
+            )
+
         # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to:
         #   - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should
         #     be a token that the model is guaranteed to never generate)