mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 12:42:51 +08:00
Update finetuning_args.py
Former-commit-id: b24635d22b3084ad29217ef55c1dd1fa4f85a1fb
This commit is contained in:
parent
0d2262ffd0
commit
ec910a87c0
@ -74,6 +74,10 @@ class RLHFArguments:
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
|
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
|
||||||
)
|
)
|
||||||
|
ppo_epochs: Optional[int] = field(
|
||||||
|
default=4,
|
||||||
|
metadata={"help": "Number of optimisation epochs per batch of samples"},
|
||||||
|
)
|
||||||
ppo_score_norm: Optional[bool] = field(
|
ppo_score_norm: Optional[bool] = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "Use score normalization in PPO training."}
|
metadata={"help": "Use score normalization in PPO training."}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user