diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index cfdc8b24..d39812c7 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -74,6 +74,10 @@ class RLHFArguments: default=None, metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."} ) + ppo_epochs: Optional[int] = field( + default=4, + metadata={"help": "Number of optimisation epochs per batch of samples"}, + ) ppo_score_norm: Optional[bool] = field( default=False, metadata={"help": "Use score normalization in PPO training."}