implement rm server #1543

This commit is contained in:
hiyouga
2023-12-03 20:52:54 +08:00
parent 03d05991f8
commit 7df4f3ab20
11 changed files with 104 additions and 24 deletions

View File

@@ -118,9 +118,9 @@ class RLHFArguments:
default=None,
metadata={"help": "The number of bits to quantize the reward model."}
)
reward_model_type: Optional[Literal["lora", "full"]] = field(
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
default="lora",
metadata={"help": "The checkpoint type of the reward model. The lora type only supports lora training."}
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
)