use log1p in orpo loss

https://github.com/huggingface/trl/pull/1491 Former-commit-id: 3b15d495264b00a4f8716bafea334778874963d7
2026-02-28 00:36:02 +08:00 · 2024-03-31 19:27:08 +08:00
parent 9abd83adb1
commit 00e17a377c
1 changed files with 1 additions and 1 deletions
--- a/src/llmtuner/train/orpo/trainer.py
+++ b/src/llmtuner/train/orpo/trainer.py
@@ -84,7 +84,7 @@ class CustomORPOTrainer(DPOTrainer):

        # Derived from Eqs. (4) and (7) from https://arxiv.org/abs/2403.07691 by using log identities and exp(log(P(y|x)) = P(y|x)
        log_odds = (chosen_logps - rejected_logps) - (
-            torch.log(1 - torch.exp(chosen_logps)) - torch.log(1 - torch.exp(rejected_logps))
+            torch.log1p(-torch.exp(chosen_logps)) - torch.log1p(-torch.exp(rejected_logps))
        )
        ratio = F.logsigmoid(log_odds)
        losses = self.beta * ratio