From fcbfa70c19dc75d23ef29c681548405723465239 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:57:19 +0800 Subject: [PATCH] disable DP Former-commit-id: d519b4d76d39b21a21b1d2f6f7ce6b3af9525d03 --- src/llamafactory/hparams/parser.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index ec5dd62c..3476a726 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -8,6 +8,7 @@ import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint +from transformers.training_args import ParallelMode from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version @@ -162,6 +163,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("PPO only accepts wandb or tensorboard logger.") + if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED: + raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.") + if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") @@ -181,14 +185,14 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if ( finetuning_args.use_galore and finetuning_args.galore_layerwise - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Distributed training does not support layer-wise GaLore.") if ( finetuning_args.use_badam and finetuning_args.badam_mode == "layer" - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") @@ -230,7 +234,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: # Post-process training arguments if ( - training_args.parallel_mode.value == "distributed" + training_args.parallel_mode == ParallelMode.DISTRIBUTED and training_args.ddp_find_unused_parameters is None and finetuning_args.finetuning_type == "lora" ): @@ -290,7 +294,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: training_args.local_rank, training_args.device, training_args.n_gpu, - training_args.parallel_mode.value == "distributed", + training_args.parallel_mode == ParallelMode.DISTRIBUTED, str(model_args.compute_dtype), ) )