huggingface · Knight7561 · Dec 18, 2024
diff --git a/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb b/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb
@@ -276,6 +276,13 @@
     "    use_mps_device=device == \"mps\",\n",
     "    # Model ID for HuggingFace Hub uploads\n",
     "    hub_model_id=finetune_name,\n",
+    "    # DPO-specific temperature parameter that controls the strength of the preference model\n",
+    "    # Lower values (like 0.1) make the model more conservative in following preferences\n",
+    "    beta=0.1,\n",
+    "    # Maximum length of the input prompt in tokens\n",
+    "    max_prompt_length=1024,\n",
+    "    # Maximum combined length of prompt + response in tokens\n",
+    "    max_length=1536\n",
     ")"
    ]
   },
@@ -294,13 +301,6 @@
     "    train_dataset=dataset,\n",
     "    # Tokenizer for processing inputs\n",
     "    processing_class=tokenizer,\n",
-    "    # DPO-specific temperature parameter that controls the strength of the preference model\n",
-    "    # Lower values (like 0.1) make the model more conservative in following preferences\n",
-    "    beta=0.1,\n",
-    "    # Maximum length of the input prompt in tokens\n",
-    "    max_prompt_length=1024,\n",
-    "    # Maximum combined length of prompt + response in tokens\n",
-    "    max_length=1536,\n",
     ")"
    ]
   },