mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 19:06:26 +08:00
@@ -102,7 +102,7 @@ class RLHFArguments:
|
||||
default="sigmoid",
|
||||
metadata={"help": "The type of DPO loss to use."},
|
||||
)
|
||||
dpo_label_smoothing = field(
|
||||
dpo_label_smoothing: float = field(
|
||||
default=0.0,
|
||||
metadata={"help": "The robust DPO label smoothing parameter in cDPO that should be between 0 and 0.5."},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user