update examples

Former-commit-id: ddec9e1b84
This commit is contained in:
hiyouga
2024-05-17 01:02:00 +08:00
parent f4bf49e891
commit dfff5119b4
27 changed files with 155 additions and 155 deletions

View File

@@ -1,7 +1,7 @@
# model
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
### method
stage: sft
do_train: true
finetuning_type: full
@@ -10,7 +10,7 @@ badam_switch_mode: descending
badam_switch_interval: 50
badam_verbose: 2
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -18,14 +18,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
@@ -34,7 +34,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps

View File

@@ -1,17 +1,17 @@
# model
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
# method
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: q_proj,v_proj
# ddp
### ddp
ddp_timeout: 180000000
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -19,14 +19,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
@@ -35,7 +35,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
fp16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps

View File

@@ -1,7 +1,7 @@
# model
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
### method
stage: sft
do_train: true
finetuning_type: full
@@ -11,7 +11,7 @@ galore_target: mlp,self_attn
galore_rank: 128
galore_scale: 2.0
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -19,14 +19,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 0.0001
@@ -35,7 +35,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps

View File

@@ -1,7 +1,7 @@
# model
### model
model_name_or_path: models/llama3-8b-instruct-pro
# method
### method
stage: sft
do_train: true
finetuning_type: freeze
@@ -9,7 +9,7 @@ freeze_trainable_layers: 8
freeze_trainable_modules: all
use_llama_pro: true
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -17,14 +17,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b-instruct-pro/freeze/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
@@ -33,7 +33,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
fp16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps

View File

@@ -1,14 +1,14 @@
# model
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: q_proj,v_proj
loraplus_lr_ratio: 16.0
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -16,14 +16,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
@@ -32,7 +32,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
fp16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps

View File

@@ -1,13 +1,13 @@
# model
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
### method
stage: sft
do_train: true
finetuning_type: full
mixture_of_depths: convert
# dataset
### dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
@@ -15,14 +15,14 @@ max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
# output
### output
output_dir: saves/llama3-8b-mod/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
optim: paged_adamw_8bit
@@ -32,7 +32,7 @@ lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
### eval
val_size: 0.1
per_device_eval_batch_size: 1
evaluation_strategy: steps