mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 20:00:36 +08:00
fix ppo args
This commit is contained in:
@@ -57,7 +57,15 @@ class FinetuningArguments:
|
||||
)
|
||||
ppo_score_norm: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Use score normalization in PPO Training."}
|
||||
metadata={"help": "Use score normalization in PPO training."}
|
||||
)
|
||||
ppo_logger: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
|
||||
)
|
||||
ppo_target: Optional[float] = field(
|
||||
default=6.0,
|
||||
metadata={"help": "Target KL value for adaptive KL control in PPO training."}
|
||||
)
|
||||
dpo_beta: Optional[float] = field(
|
||||
default=0.1,
|
||||
|
||||
Reference in New Issue
Block a user