fix ppo args

This commit is contained in:
hiyouga
2023-10-11 23:40:50 +08:00
parent 2818af0b09
commit 11bd271364
4 changed files with 18 additions and 9 deletions

View File

@@ -57,7 +57,15 @@ class FinetuningArguments:
)
ppo_score_norm: Optional[bool] = field(
default=False,
metadata={"help": "Use score normalization in PPO Training."}
metadata={"help": "Use score normalization in PPO training."}
)
ppo_logger: Optional[str] = field(
default=None,
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
)
ppo_target: Optional[float] = field(
default=6.0,
metadata={"help": "Target KL value for adaptive KL control in PPO training."}
)
dpo_beta: Optional[float] = field(
default=0.1,