mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 11:50:35 +08:00
upcast logits
This commit is contained in:
@@ -99,7 +99,7 @@ class PairwiseTrainer(Trainer):
|
||||
chosen_scores = chosen_rewards.gather(dim=-1, index=(chosen_masks.sum(dim=-1, keepdim=True) - 1))
|
||||
rejected_scores = rejected_rewards.gather(dim=-1, index=(rejected_masks.sum(dim=-1, keepdim=True) - 1))
|
||||
chosen_scores, rejected_scores = chosen_scores.squeeze(), rejected_scores.squeeze()
|
||||
loss = -torch.nn.functional.logsigmoid(chosen_scores - rejected_scores).mean()
|
||||
loss = -torch.nn.functional.logsigmoid(chosen_scores.float() - rejected_scores.float()).mean()
|
||||
if return_outputs:
|
||||
return loss, (loss, chosen_scores, rejected_scores)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user