From ccec17f7731fcdbfcf48786e8994590c5afc9a47 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 8 Mar 2024 20:41:43 +0800 Subject: [PATCH] fix example params Former-commit-id: 8a45213440ffc960947dd69ecf3b092aa724bef3 --- examples/extras/galore/adamw.sh | 2 +- examples/extras/galore/adamw_8bit_bf16.sh | 2 +- examples/extras/galore/galore_adamw.sh | 2 +- examples/extras/galore/galore_adamw_8bit_bf16.sh | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/extras/galore/adamw.sh b/examples/extras/galore/adamw.sh index cad03879..1fd2aaf0 100644 --- a/examples/extras/galore/adamw.sh +++ b/examples/extras/galore/adamw.sh @@ -15,7 +15,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --preprocessing_num_workers 16 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ + --gradient_accumulation_steps 2 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --warmup_steps 20 \ diff --git a/examples/extras/galore/adamw_8bit_bf16.sh b/examples/extras/galore/adamw_8bit_bf16.sh index 9599bf00..01f4e8de 100644 --- a/examples/extras/galore/adamw_8bit_bf16.sh +++ b/examples/extras/galore/adamw_8bit_bf16.sh @@ -16,7 +16,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --preprocessing_num_workers 16 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ + --gradient_accumulation_steps 2 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --warmup_steps 20 \ diff --git a/examples/extras/galore/galore_adamw.sh b/examples/extras/galore/galore_adamw.sh index 28ce72bb..83be6a51 100644 --- a/examples/extras/galore/galore_adamw.sh +++ b/examples/extras/galore/galore_adamw.sh @@ -18,7 +18,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --preprocessing_num_workers 16 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ + --gradient_accumulation_steps 2 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --warmup_steps 20 \ diff --git a/examples/extras/galore/galore_adamw_8bit_bf16.sh b/examples/extras/galore/galore_adamw_8bit_bf16.sh index 0578856c..ddddcb33 100644 --- a/examples/extras/galore/galore_adamw_8bit_bf16.sh +++ b/examples/extras/galore/galore_adamw_8bit_bf16.sh @@ -10,7 +10,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --finetuning_type full \ --use_galore \ --galore_target mlp,self_attn \ - --galore_rank 32 \ + --galore_rank 16 \ --optim adamw_8bit \ --output_dir ../../../saves/LLaMA2-7B/galore/sft \ --overwrite_cache \ @@ -19,7 +19,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --preprocessing_num_workers 16 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 8 \ + --gradient_accumulation_steps 2 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --warmup_steps 20 \