update examples

Former-commit-id: ddec9e1b84
2026-03-06 19:56:01 +08:00 · 2024-05-17 01:02:00 +08:00
parent f4bf49e891
commit dfff5119b4
27 changed files with 155 additions and 155 deletions
--- a/examples/extras/badam/llama3_lora_sft.yaml
+++ b/examples/extras/badam/llama3_lora_sft.yaml
@@ -1,7 +1,7 @@
-# model
+### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: full
@@ -10,7 +10,7 @@ badam_switch_mode: descending
 badam_switch_interval: 50
 badam_verbose: 2

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -18,14 +18,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b/full/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 0.0001
@@ -34,7 +34,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 pure_bf16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -1,17 +1,17 @@
-# model
+### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 quantization_bit: 4

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: lora
 lora_target: q_proj,v_proj

-# ddp
+### ddp
 ddp_timeout: 180000000

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -19,14 +19,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b/lora/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 0.0001
@@ -35,7 +35,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 fp16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -1,7 +1,7 @@
-# model
+### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: full
@@ -11,7 +11,7 @@ galore_target: mlp,self_attn
 galore_rank: 128
 galore_scale: 2.0

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -19,14 +19,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b/full/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 1
 learning_rate: 0.0001
@@ -35,7 +35,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 pure_bf16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -1,7 +1,7 @@
-# model
+### model
 model_name_or_path: models/llama3-8b-instruct-pro

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: freeze
@@ -9,7 +9,7 @@ freeze_trainable_layers: 8
 freeze_trainable_modules: all
 use_llama_pro: true

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -17,14 +17,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b-instruct-pro/freeze/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 0.0001
@@ -33,7 +33,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 fp16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -1,14 +1,14 @@
-# model
+### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: lora
 lora_target: q_proj,v_proj
 loraplus_lr_ratio: 16.0

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -16,14 +16,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b/lora/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 0.0001
@@ -32,7 +32,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 fp16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -1,13 +1,13 @@
-# model
+### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct

-# method
+### method
 stage: sft
 do_train: true
 finetuning_type: full
 mixture_of_depths: convert

-# dataset
+### dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
@@ -15,14 +15,14 @@ max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16

-# output
+### output
 output_dir: saves/llama3-8b-mod/full/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true

-# train
+### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 optim: paged_adamw_8bit
@@ -32,7 +32,7 @@ lr_scheduler_type: cosine
 warmup_steps: 0.1
 pure_bf16: true

-# eval
+### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps