update dpo examples

Former-commit-id: 30e1c8e7450a68ccabede245cbc09b416ef7a94d
This commit is contained in:
hiyouga 2024-05-27 19:56:04 +08:00
parent 51a1097c64
commit 710642827a

View File

@ -6,6 +6,7 @@ stage: dpo
do_train: true
finetuning_type: lora
lora_target: q_proj,v_proj
pref_beta: 0.1
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
### dataset