add rlhf-v dataset

2025-12-16 20:00:36 +08:00 · 2024-09-01 22:57:41 +08:00
parent 55027282cd
commit 8e49940746
13 changed files with 118 additions and 33 deletions
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -176,7 +176,6 @@ class CustomDPOTrainer(DPOTrainer):
            batch = {k: v.detach().clone() for k, v in batch.items()}  # avoid error

        all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)
-
        all_logps, valid_length = get_batch_logps(logits=all_logits, labels=batch["labels"])
        if self.loss_type in ["ipo", "orpo", "simpo"]:
            all_logps = all_logps / valid_length