From 92e24a73cb87e663bbf3beceffd1e8061c713490 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 22 Apr 2024 00:35:41 +0800 Subject: [PATCH] remove extras Former-commit-id: ddbd29d77702f7b82051d930e3eac1b47f5c6d35 --- examples/extras/MoD/sft.sh | 33 ------------------------ examples/extras/badam/sft.sh | 35 ------------------------- examples/extras/fsdp_qlora/sft.sh | 40 ----------------------------- examples/extras/galore/sft.sh | 36 -------------------------- examples/extras/llama_pro/expand.sh | 6 ----- examples/extras/llama_pro/sft.sh | 34 ------------------------ examples/extras/loraplus/sft.sh | 33 ------------------------ 7 files changed, 217 deletions(-) delete mode 100644 examples/extras/MoD/sft.sh delete mode 100644 examples/extras/badam/sft.sh delete mode 100644 examples/extras/fsdp_qlora/sft.sh delete mode 100644 examples/extras/galore/sft.sh delete mode 100644 examples/extras/llama_pro/expand.sh delete mode 100644 examples/extras/llama_pro/sft.sh delete mode 100644 examples/extras/loraplus/sft.sh diff --git a/examples/extras/MoD/sft.sh b/examples/extras/MoD/sft.sh deleted file mode 100644 index 2c8f04a3..00000000 --- a/examples/extras/MoD/sft.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path meta-llama/Llama-2-7b-hf \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../../data \ - --template default \ - --finetuning_type full \ - --mixture_of_depths convert \ - --output_dir ../../../saves/LLaMA2-7B/mod/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ - --optim paged_adamw_8bit \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --plot_loss \ - --pure_bf16 diff --git a/examples/extras/badam/sft.sh b/examples/extras/badam/sft.sh deleted file mode 100644 index c2319caa..00000000 --- a/examples/extras/badam/sft.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path meta-llama/Llama-2-7b-hf \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../../data \ - --template default \ - --finetuning_type full \ - --use_badam \ - --badam_switch_mode descending \ - --badam_switch_block_every 50 \ - --badam_verbose 2 \ - --output_dir ../../../saves/LLaMA2-7B/badam/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --plot_loss \ - --pure_bf16 diff --git a/examples/extras/fsdp_qlora/sft.sh b/examples/extras/fsdp_qlora/sft.sh deleted file mode 100644 index 614245d3..00000000 --- a/examples/extras/fsdp_qlora/sft.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -pip install "transformers>=4.39.1" -pip install "accelerate>=0.28.0" -pip install "bitsandbytes>=0.43.0" - -CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ - --config_file ../../accelerate/fsdp_config.yaml \ - ../../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path meta-llama/Llama-2-70b-hf \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../../data \ - --template default \ - --finetuning_type lora \ - --lora_target q_proj,v_proj \ - --output_dir ../../../saves/LLaMA2-70B/lora/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 4 \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --ddp_timeout 180000000 \ - --quantization_bit 4 \ - --plot_loss \ - --fp16 diff --git a/examples/extras/galore/sft.sh b/examples/extras/galore/sft.sh deleted file mode 100644 index da1779ed..00000000 --- a/examples/extras/galore/sft.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path meta-llama/Llama-2-7b-hf \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../../data \ - --template default \ - --finetuning_type full \ - --use_galore \ - --galore_layerwise \ - --galore_target mlp,self_attn \ - --galore_rank 128 \ - --galore_scale 2.0 \ - --output_dir ../../../saves/LLaMA2-7B/galore/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --plot_loss \ - --pure_bf16 diff --git a/examples/extras/llama_pro/expand.sh b/examples/extras/llama_pro/expand.sh deleted file mode 100644 index b260902c..00000000 --- a/examples/extras/llama_pro/expand.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -python ../../../scripts/llama_pro.py \ - --model_name_or_path meta-llama/Llama-2-7b-hf \ - --output_dir ../../../models/llama2-7b-pro \ - --num_expand 8 diff --git a/examples/extras/llama_pro/sft.sh b/examples/extras/llama_pro/sft.sh deleted file mode 100644 index 573078ff..00000000 --- a/examples/extras/llama_pro/sft.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path ../../../models/llama2-7b-pro \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../../data \ - --template default \ - --finetuning_type freeze \ - --name_module_trainable all \ - --num_layer_trainable 8 \ - --use_llama_pro \ - --output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --plot_loss \ - --fp16 diff --git a/examples/extras/loraplus/sft.sh b/examples/extras/loraplus/sft.sh deleted file mode 100644 index cb334e7d..00000000 --- a/examples/extras/loraplus/sft.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ - --stage sft \ - --do_train \ - --model_name_or_path meta-llama/Llama-2-7b-hf \ - --dataset alpaca_gpt4_en,glaive_toolcall \ - --dataset_dir ../../data \ - --template default \ - --finetuning_type lora \ - --lora_target q_proj,v_proj \ - --loraplus_lr_ratio 16.0 \ - --output_dir ../../saves/LLaMA2-7B/loraplus/sft \ - --overwrite_cache \ - --overwrite_output_dir \ - --cutoff_len 1024 \ - --preprocessing_num_workers 16 \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ - --lr_scheduler_type cosine \ - --logging_steps 10 \ - --warmup_steps 20 \ - --save_steps 100 \ - --eval_steps 100 \ - --evaluation_strategy steps \ - --load_best_model_at_end \ - --learning_rate 5e-5 \ - --num_train_epochs 3.0 \ - --max_samples 3000 \ - --val_size 0.1 \ - --plot_loss \ - --fp16