improve KTO impl., replace datasets

Former-commit-id: c450ee87a3
2026-06-20 06:08:57 +08:00 · 2024-05-18 03:44:56 +08:00
parent 97469892c3
commit 13d7b48efe
66 changed files with 46444 additions and 28125 deletions
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml
@@ -9,7 +9,7 @@ lora_target: q_proj,v_proj
 dpo_ftx: 1.0

 ### dataset
-dataset: orca_rlhf
+dataset: dpo_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
@@ -26,7 +26,7 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.00001
+learning_rate: 0.000005
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_steps: 0.1
--- a/examples/lora_single_gpu/llama3_lora_kto.yaml
+++ b/examples/lora_single_gpu/llama3_lora_kto.yaml
@@ -0,0 +1,39 @@
+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+
+### method
+stage: kto
+do_train: true
+finetuning_type: lora
+lora_target: q_proj,v_proj
+kto_ftx: 0.1
+
+### dataset
+dataset: kto_en_demo
+template: llama3
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/llama3-8b/lora/kto
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 0.000005
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_steps: 0.1
+fp16: true
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+evaluation_strategy: steps
+eval_steps: 500
--- a/examples/lora_single_gpu/llama3_lora_orpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_orpo.yaml
@@ -8,7 +8,7 @@ finetuning_type: lora
 lora_target: q_proj,v_proj

 ### dataset
-dataset: orca_rlhf
+dataset: dpo_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
@@ -25,7 +25,7 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.00001
+learning_rate: 0.000005
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_steps: 0.1
--- a/examples/lora_single_gpu/llama3_lora_ppo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_ppo.yaml
@@ -9,7 +9,7 @@ finetuning_type: lora
 lora_target: q_proj,v_proj

 ### dataset
-dataset: identity,alpaca_gpt4_en
+dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
--- a/examples/lora_single_gpu/llama3_lora_predict.yaml
+++ b/examples/lora_single_gpu/llama3_lora_predict.yaml
@@ -8,7 +8,7 @@ do_predict: true
 finetuning_type: lora

 ### dataset
-dataset: identity,alpaca_gpt4_en
+dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 50
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/lora_single_gpu/llama3_lora_reward.yaml
@@ -8,7 +8,7 @@ finetuning_type: lora
 lora_target: q_proj,v_proj

 ### dataset
-dataset: orca_rlhf
+dataset: dpo_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_single_gpu/llama3_lora_sft.yaml
@@ -8,7 +8,7 @@ finetuning_type: lora
 lora_target: q_proj,v_proj

 ### dataset
-dataset: identity,alpaca_gpt4_en
+dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
--- a/examples/lora_single_gpu/llama3_preprocess.yaml
+++ b/examples/lora_single_gpu/llama3_preprocess.yaml
@@ -8,7 +8,7 @@ finetuning_type: lora
 lora_target: q_proj,v_proj

 ### dataset
-dataset: identity,alpaca_gpt4_en
+dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 1024
 max_samples: 1000