From ec910a87c0f4dd3078aef61c28fa3970ba81241f Mon Sep 17 00:00:00 2001 From: Yuchen Han <42163912+hannlp@users.noreply.github.com> Date: Fri, 17 Nov 2023 00:15:51 -0800 Subject: [PATCH] Update finetuning_args.py Former-commit-id: b24635d22b3084ad29217ef55c1dd1fa4f85a1fb --- src/llmtuner/hparams/finetuning_args.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index cfdc8b24..d39812c7 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -74,6 +74,10 @@ class RLHFArguments: default=None, metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."} ) + ppo_epochs: Optional[int] = field( + default=4, + metadata={"help": "Number of optimisation epochs per batch of samples"}, + ) ppo_score_norm: Optional[bool] = field( default=False, metadata={"help": "Use score normalization in PPO training."}