mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-14 23:58:11 +08:00
use log1p in orpo loss
https://github.com/huggingface/trl/pull/1491 Former-commit-id: 3b15d495264b00a4f8716bafea334778874963d7
This commit is contained in:
parent
9abd83adb1
commit
00e17a377c
@ -84,7 +84,7 @@ class CustomORPOTrainer(DPOTrainer):
|
||||
|
||||
# Derived from Eqs. (4) and (7) from https://arxiv.org/abs/2403.07691 by using log identities and exp(log(P(y|x)) = P(y|x)
|
||||
log_odds = (chosen_logps - rejected_logps) - (
|
||||
torch.log(1 - torch.exp(chosen_logps)) - torch.log(1 - torch.exp(rejected_logps))
|
||||
torch.log1p(-torch.exp(chosen_logps)) - torch.log1p(-torch.exp(rejected_logps))
|
||||
)
|
||||
ratio = F.logsigmoid(log_odds)
|
||||
losses = self.beta * ratio
|
||||
|
Loading…
x
Reference in New Issue
Block a user