Former-commit-id: 3164b4f11b72684c8aa2105037cb36c47b6acfd4
This commit is contained in:
hiyouga 2024-03-26 17:30:12 +08:00
parent ec94e5e876
commit c311375b50

View File

@ -102,7 +102,7 @@ class RLHFArguments:
default="sigmoid", default="sigmoid",
metadata={"help": "The type of DPO loss to use."}, metadata={"help": "The type of DPO loss to use."},
) )
dpo_label_smoothing = field( dpo_label_smoothing: float = field(
default=0.0, default=0.0,
metadata={"help": "The robust DPO label smoothing parameter in cDPO that should be between 0 and 0.5."}, metadata={"help": "The robust DPO label smoothing parameter in cDPO that should be between 0 and 0.5."},
) )