mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-11-05 10:22:15 +08:00
update dpo examples
Former-commit-id: 69e32a7cb6336ca9a953c379ec794818b3f169bd
This commit is contained in:
parent
9e87ea0cb7
commit
c12d99ea4e
@ -6,6 +6,7 @@ stage: dpo
|
|||||||
do_train: true
|
do_train: true
|
||||||
finetuning_type: lora
|
finetuning_type: lora
|
||||||
lora_target: q_proj,v_proj
|
lora_target: q_proj,v_proj
|
||||||
|
pref_beta: 0.1
|
||||||
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
|
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
|
||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user