mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-03-07 04:05:58 +08:00
@@ -118,9 +118,9 @@ class RLHFArguments:
|
||||
default=None,
|
||||
metadata={"help": "The number of bits to quantize the reward model."}
|
||||
)
|
||||
reward_model_type: Optional[Literal["lora", "full"]] = field(
|
||||
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
|
||||
default="lora",
|
||||
metadata={"help": "The checkpoint type of the reward model. The lora type only supports lora training."}
|
||||
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user