mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-22 21:52:51 +08:00
update dpo examples
Former-commit-id: 30e1c8e7450a68ccabede245cbc09b416ef7a94d
This commit is contained in:
parent
51a1097c64
commit
710642827a
@ -6,6 +6,7 @@ stage: dpo
|
||||
do_train: true
|
||||
finetuning_type: lora
|
||||
lora_target: q_proj,v_proj
|
||||
pref_beta: 0.1
|
||||
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
|
||||
|
||||
### dataset
|
||||
|
Loading…
x
Reference in New Issue
Block a user