diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml index 40b62f24..40afd2ee 100644 --- a/examples/train_full/llama3_full_sft_ds3.yaml +++ b/examples/train_full/llama3_full_sft_ds3.yaml @@ -5,9 +5,6 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: full - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z3_config.json ### dataset @@ -33,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1