diff --git a/examples/extras/llama_pro/expand.sh b/examples/extras/llama_pro/expand.sh
index 9d99edea..b260902c 100644
--- a/examples/extras/llama_pro/expand.sh
+++ b/examples/extras/llama_pro/expand.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-pip install -e ../../../.
-
 python ../../../scripts/llama_pro.py \
     --model_name_or_path meta-llama/Llama-2-7b-hf \
     --output_dir ../../../models/llama2-7b-pro \
diff --git a/examples/extras/llama_pro/sft.sh b/examples/extras/llama_pro/sft.sh
index 712f2bcb..573078ff 100644
--- a/examples/extras/llama_pro/sft.sh
+++ b/examples/extras/llama_pro/sft.sh
@@ -10,6 +10,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
     --finetuning_type freeze \
     --name_module_trainable all \
     --num_layer_trainable 8 \
+    --use_llama_pro \
     --output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \
     --overwrite_cache \
     --overwrite_output_dir \
diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh
index 392d717b..56509225 100644
--- a/examples/full_multi_gpu/multi_node.sh
+++ b/examples/full_multi_gpu/multi_node.sh
@@ -33,5 +33,6 @@ python -m torch.distributed.run \
     --num_train_epochs 3.0 \
     --max_samples 3000 \
     --val_size 0.1 \
+    --ddp_timeout 1800000 \
     --plot_loss \
     --fp16
diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh
index c748420f..0502e7f1 100644
--- a/examples/full_multi_gpu/single_node.sh
+++ b/examples/full_multi_gpu/single_node.sh
@@ -27,5 +27,6 @@ deepspeed --num_gpus 4 ../../src/train_bash.py \
     --num_train_epochs 3.0 \
     --max_samples 3000 \
     --val_size 0.1 \
+    --ddp_timeout 1800000 \
     --plot_loss \
     --fp16
diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh
index 1ac61590..f538c16a 100644
--- a/examples/lora_multi_gpu/multi_node.sh
+++ b/examples/lora_multi_gpu/multi_node.sh
@@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
     --num_train_epochs 3.0 \
     --max_samples 3000 \
     --val_size 0.1 \
+    --ddp_timeout 1800000 \
     --plot_loss \
     --fp16
diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh
index 104535d0..fef7864b 100644
--- a/examples/lora_multi_gpu/single_node.sh
+++ b/examples/lora_multi_gpu/single_node.sh
@@ -30,5 +30,6 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch \
     --num_train_epochs 3.0 \
     --max_samples 3000 \
     --val_size 0.1 \
+    --ddp_timeout 1800000 \
     --plot_loss \
     --fp16