update example docs

Former-commit-id: f02f87c6fb
This commit is contained in:
hiyouga
2024-05-06 22:51:02 +08:00
parent eb21a527a6
commit 92cafef325
35 changed files with 1048 additions and 594 deletions

View File

@@ -0,0 +1,41 @@
# model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
stage: sft
do_train: true
finetuning_type: full
use_badam: true
badam_switch_mode: descending
badam_switch_interval: 50
badam_verbose: 2
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,35 +0,0 @@
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \
--stage sft \
--do_train \
--model_name_or_path meta-llama/Llama-2-7b-hf \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../../data \
--template default \
--finetuning_type full \
--use_badam \
--badam_switch_mode descending \
--badam_switch_block_every 50 \
--badam_verbose 2 \
--output_dir ../../../saves/LLaMA2-7B/badam/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 8 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--plot_loss \
--pure_bf16

View File

@@ -0,0 +1,39 @@
# model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
# method
stage: sft
do_train: true
finetuning_type: lora
lora_target: q_proj,v_proj
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
fp16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,41 +0,0 @@
#!/bin/bash
# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
pip install "transformers>=4.39.1"
pip install "accelerate>=0.28.0"
pip install "bitsandbytes>=0.43.0"
CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
--config_file ../../accelerate/fsdp_config.yaml \
../../../src/train.py \
--stage sft \
--do_train \
--model_name_or_path meta-llama/Llama-2-70b-hf \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../../data \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--output_dir ../../../saves/LLaMA2-70B/lora/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--ddp_timeout 180000000 \
--quantization_bit 4 \
--plot_loss \
--fp16

View File

@@ -0,0 +1,10 @@
#!/bin/bash
# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
pip install "transformers>=4.39.1"
pip install "accelerate>=0.28.0"
pip install "bitsandbytes>=0.43.0"
CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
--config_file examples/accelerate/fsdp_config.yaml \
src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml

View File

@@ -0,0 +1,42 @@
# model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
stage: sft
do_train: true
finetuning_type: full
use_galore: true
galore_layerwise: true
galore_target: mlp,self_attn
galore_rank: 128
galore_scale: 2.0
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,36 +0,0 @@
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \
--stage sft \
--do_train \
--model_name_or_path meta-llama/Llama-2-7b-hf \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../../data \
--template default \
--finetuning_type full \
--use_galore \
--galore_layerwise \
--galore_target mlp,self_attn \
--galore_rank 128 \
--galore_scale 2.0 \
--output_dir ../../../saves/LLaMA2-7B/galore/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 1 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--plot_loss \
--pure_bf16

View File

@@ -1,6 +1,6 @@
#!/bin/bash
python ../../../scripts/llama_pro.py \
--model_name_or_path meta-llama/Llama-2-7b-hf \
--output_dir ../../../models/llama2-7b-pro \
python scripts/llama_pro.py \
--model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
--output_dir models/llama3-8b-instruct-pro \
--num_expand 8

View File

@@ -0,0 +1,40 @@
# model
model_name_or_path: models/llama3-8b-instruct-pro
# method
stage: sft
do_train: true
finetuning_type: freeze
name_module_trainable: all
num_layer_trainable: 8
use_llama_pro: true
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b-instruct-pro/freeze/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,34 +0,0 @@
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \
--stage sft \
--do_train \
--model_name_or_path ../../../models/llama2-7b-pro \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../../data \
--template default \
--finetuning_type freeze \
--name_module_trainable all \
--num_layer_trainable 8 \
--use_llama_pro \
--output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 8 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--plot_loss \
--fp16

View File

@@ -0,0 +1,39 @@
# model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
stage: sft
do_train: true
finetuning_type: lora
lora_target: q_proj,v_proj
loraplus_lr_ratio: 16.0
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,33 +0,0 @@
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \
--stage sft \
--do_train \
--model_name_or_path meta-llama/Llama-2-7b-hf \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../data \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--loraplus_lr_ratio 16.0 \
--output_dir ../../saves/LLaMA2-7B/loraplus/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 8 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--plot_loss \
--fp16

View File

@@ -0,0 +1,39 @@
# model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
# method
stage: sft
do_train: true
finetuning_type: full
mixture_of_depths: convert
# dataset
dataset: identity,alpaca_gpt4_en
template: llama3
cutoff_len: 1024
max_samples: 1000
val_size: 0.1
overwrite_cache: true
preprocessing_num_workers: 16
# output
output_dir: saves/llama3-8b-mod/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
# train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
optim: paged_adamw_8bit
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_steps: 0.1
pure_bf16: true
# eval
per_device_eval_batch_size: 1
evaluation_strategy: steps
eval_steps: 500

View File

@@ -1,33 +0,0 @@
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \
--stage sft \
--do_train \
--model_name_or_path meta-llama/Llama-2-7b-hf \
--dataset alpaca_gpt4_en,glaive_toolcall \
--dataset_dir ../../../data \
--template default \
--finetuning_type full \
--mixture_of_depths convert \
--output_dir ../../../saves/LLaMA2-7B/mod/sft \
--overwrite_cache \
--overwrite_output_dir \
--cutoff_len 1024 \
--preprocessing_num_workers 16 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 8 \
--optim paged_adamw_8bit \
--lr_scheduler_type cosine \
--logging_steps 10 \
--warmup_steps 20 \
--save_steps 100 \
--eval_steps 100 \
--evaluation_strategy steps \
--load_best_model_at_end \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--max_samples 3000 \
--val_size 0.1 \
--plot_loss \
--pure_bf16