fix RM accuracy

2026-06-25 08:38:55 +08:00 · 2023-06-28 01:40:13 +08:00
parent 9cb1af71f3
commit 7826a8ca77
1 changed files with 6 additions and 3 deletions
--- a/src/utils/pairwise.py
+++ b/src/utils/pairwise.py
@@ -13,8 +13,7 @@ logger = get_logger(__name__)
 def compute_accuracy(eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]:
    preds, _ = eval_preds
-    preds = np.array(preds)
+    return {"accuracy": (preds[0] > preds[1]).sum() / len(preds[0])}
    return {"accuracy": (preds[:, 0] > preds[:, 1]).sum() / len(preds)}
 class PairwiseDataCollatorWithPadding(DynamicDataCollatorWithPadding):
@@ -49,9 +48,13 @@ class PairwisePeftTrainer(PeftTrainer):
        We use score on the EOS token to represent reward of the whole sentence.
        Subclass and override to inject custom behavior. It should not be directly used by external scripts.
        Note that the first element will be removed from the output tuple.
        See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
        """
        batch_size = inputs["input_ids"].size(0) // 2
        _, _, values = model(**inputs)
        r_accept, r_reject = values[:, -1].split(batch_size, dim=0)
        loss = -torch.log(torch.sigmoid(r_accept - r_reject)).mean()
-        return (loss, torch.stack((r_accept, r_reject), dim=-1)) if return_outputs else loss
+        return (loss, [loss, r_accept, r_reject]) if return_outputs else loss