support dpo-ftx

Former-commit-id: b87c74289d
2026-06-19 21:58:55 +08:00 · 2023-12-16 19:21:41 +08:00
parent f0f9d253d8
commit 4e75ca1222
6 changed files with 103 additions and 25 deletions
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -70,6 +70,14 @@ class RLHFArguments:
        default=0.1,
        metadata={"help": "The beta parameter for the DPO loss."}
    )
+    dpo_loss: Optional[Literal["sigmoid", "hinge"]] = field(
+        default="sigmoid",
+        metadata={"help": "The type of DPO loss to use."}
+    )
+    dpo_ftx: Optional[float] = field(
+        default=0,
+        metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."}
+    )
    ppo_buffer_size: Optional[int] = field(
        default=1,
        metadata={"help": "The number of mini-batches to make experience buffer in a PPO optimization step."}