diff --git a/examples/extras/llama_pro/expand.sh b/examples/extras/llama_pro/expand.sh index 9d99edea..b260902c 100644 --- a/examples/extras/llama_pro/expand.sh +++ b/examples/extras/llama_pro/expand.sh @@ -1,7 +1,5 @@ #!/bin/bash -pip install -e ../../../. - python ../../../scripts/llama_pro.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --output_dir ../../../models/llama2-7b-pro \ diff --git a/examples/extras/llama_pro/sft.sh b/examples/extras/llama_pro/sft.sh index 712f2bcb..573078ff 100644 --- a/examples/extras/llama_pro/sft.sh +++ b/examples/extras/llama_pro/sft.sh @@ -10,6 +10,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --finetuning_type freeze \ --name_module_trainable all \ --num_layer_trainable 8 \ + --use_llama_pro \ --output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \ --overwrite_cache \ --overwrite_output_dir \ diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh index 392d717b..56509225 100644 --- a/examples/full_multi_gpu/multi_node.sh +++ b/examples/full_multi_gpu/multi_node.sh @@ -33,5 +33,6 @@ python -m torch.distributed.run \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh index c748420f..0502e7f1 100644 --- a/examples/full_multi_gpu/single_node.sh +++ b/examples/full_multi_gpu/single_node.sh @@ -27,5 +27,6 @@ deepspeed --num_gpus 4 ../../src/train_bash.py \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh index 1ac61590..f538c16a 100644 --- a/examples/lora_multi_gpu/multi_node.sh +++ b/examples/lora_multi_gpu/multi_node.sh @@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16 diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh index 104535d0..fef7864b 100644 --- a/examples/lora_multi_gpu/single_node.sh +++ b/examples/lora_multi_gpu/single_node.sh @@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch \ --num_train_epochs 3.0 \ --max_samples 3000 \ --val_size 0.1 \ + --ddp_timeout 1800000 \ --plot_loss \ --fp16