From eb363b04b9a3c0a94ad0fb33c5e79d7bb671bfcc Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sat, 9 Mar 2024 02:30:37 +0800 Subject: [PATCH] update examples Former-commit-id: 4c00bcdcaeb675c9fdb3e977c27c3604d7895ae2 --- examples/extras/llama_pro/expand.sh | 2 -- examples/extras/llama_pro/sft.sh | 1 + examples/full_multi_gpu/multi_node.sh | 1 + examples/full_multi_gpu/single_node.sh | 1 + examples/lora_multi_gpu/multi_node.sh | 1 + examples/lora_multi_gpu/single_node.sh | 1 + 6 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/extras/llama_pro/expand.sh b/examples/extras/llama_pro/expand.sh index 9d99edea..b260902c 100644 --- a/examples/extras/llama_pro/expand.sh +++ b/examples/extras/llama_pro/expand.sh @@ -1,7 +1,5 @@ #!/bin/bash -pip install -e ../../../. - python ../../../scripts/llama_pro.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --output_dir ../../../models/llama2-7b-pro \ diff --git a/examples/extras/llama_pro/sft.sh b/examples/extras/llama_pro/sft.sh index 712f2bcb..573078ff 100644 --- a/examples/extras/llama_pro/sft.sh +++ b/examples/extras/llama_pro/sft.sh @@ -10,6 +10,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --finetuning_type freeze \ --name_module_trainable all \ --num_layer_trainable 8 \ + --use_llama_pro \ --output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \ --overwrite_cache \ --overwrite_output_dir \ diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh index 392d717b..56509225 100644 --- a/examples/full_multi_gpu/multi_node.sh +++ b/examples/full_multi_gpu/multi_node.sh @@ -33,5 +33,6 @@ python -m torch.distributed.run \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh index c748420f..0502e7f1 100644 --- a/examples/full_multi_gpu/single_node.sh +++ b/examples/full_multi_gpu/single_node.sh @@ -27,5 +27,6 @@ deepspeed --num_gpus 4 ../../src/train_bash.py \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh index 1ac61590..f538c16a 100644 --- a/examples/lora_multi_gpu/multi_node.sh +++ b/examples/lora_multi_gpu/multi_node.sh @@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh index 104535d0..fef7864b 100644 --- a/examples/lora_multi_gpu/single_node.sh +++ b/examples/lora_multi_gpu/single_node.sh @@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16