From f8cf0d5e5df91189db7dc6effb57c7ba2df0224c Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 27 May 2024 19:56:04 +0800 Subject: [PATCH] update dpo examples Former-commit-id: 69e32a7cb6336ca9a953c379ec794818b3f169bd --- examples/lora_single_gpu/llama3_lora_dpo.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index 958be1b5..f68244b7 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -6,6 +6,7 @@ stage: dpo do_train: true finetuning_type: lora lora_target: q_proj,v_proj +pref_beta: 0.1 pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo] ### dataset