tiny fix

Former-commit-id: 08f7e0862b9df353a0e4d8274617c1a5e6fa6619
2025-11-05 18:32:14 +08:00 · 2023-05-28 21:48:33 +08:00 · 2023-05-28 21:48:33 +08:00 · 684afbf32c
commit 684afbf32c
parent a4384e442c
1 changed files with 2 additions and 2 deletions
--- a/src/utils/ppo.py
+++ b/src/utils/ppo.py
@ -157,8 +157,8 @@ class PPOTrainerForLLaMA(PPOTrainer, PeftTrainer):
                stats = self.step(queries, responses, rewards)
-                loss_meter.update(stats["ppo/loss/total"])
+                loss_meter.update(stats["ppo/loss/total"], n=len(rewards))
-                reward_meter.update(torch.tensor(rewards).sum().item(), n=len(rewards))
+                reward_meter.update(torch.stack(rewards).mean().item(), n=len(rewards))
                if steps_trained == len_dataloader:
                    dataiter = iter(self.dataloader)