update dpo examples

Former-commit-id: 69e32a7cb6336ca9a953c379ec794818b3f169bd
This commit is contained in:
hiyouga 2024-05-27 19:56:04 +08:00
parent 9670f5e41a
commit f8cf0d5e5d

View File

@ -6,6 +6,7 @@ stage: dpo
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_target: q_proj,v_proj lora_target: q_proj,v_proj
pref_beta: 0.1
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo] pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
### dataset ### dataset