From bf2b8df5407af7c3373239555600736fceac4848 Mon Sep 17 00:00:00 2001 From: Ting Date: Tue, 19 Nov 2024 19:10:07 +0800 Subject: [PATCH] update Former-commit-id: ef6e14550dd76810285cee9c268590d1d9423e54 --- src/llamafactory/train/dpo/workflow.py | 9 +++++++-- src/llamafactory/train/sft/workflow.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index a94a7eff..04f534ce 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -16,6 +16,7 @@ # limitations under the License. from typing import TYPE_CHECKING, List, Optional + import torch.distributed as dist from ...data import PairwiseDataCollatorWithPadding, get_dataset, get_template_and_fix_tokenizer @@ -85,9 +86,13 @@ def run_dpo( # Training if training_args.do_train: train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) - train_result.metrics['effective_tokens_per_sec'] = effi_token_num * train_result.metrics['epoch'] / train_result.metrics['train_runtime'] + train_result.metrics["effective_tokens_per_sec"] = ( + effi_token_num * train_result.metrics["epoch"] / train_result.metrics["train_runtime"] + ) if dist.is_initialized(): - train_result.metrics['effective_tokens_per_sec'] = train_result.metrics['effective_tokens_per_sec'] / dist.get_world_size() + train_result.metrics["effective_tokens_per_sec"] = ( + train_result.metrics["effective_tokens_per_sec"] / dist.get_world_size() + ) trainer.save_model() trainer.log_metrics("train", train_result.metrics) diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index c288b69f..197a4866 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -16,6 +16,7 @@ # limitations under the License. from typing import TYPE_CHECKING, List, Optional + import torch.distributed as dist from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset, get_template_and_fix_tokenizer @@ -66,9 +67,9 @@ def run_sft( training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams training_args.remove_unused_columns = False # important for multimodal dataset - effi_token_num = 0.0 + effective_token_num = 0.0 for data in dataset_module["train_dataset"]: - effi_token_num += len(data["input_ids"]) + effective_token_num += len(data["input_ids"]) # Metric utils metric_module = {} @@ -99,9 +100,13 @@ def run_sft( # Training if training_args.do_train: train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) - train_result.metrics['effective_tokens_per_sec'] = effi_token_num * train_result.metrics['epoch'] / train_result.metrics['train_runtime'] + train_result.metrics["effective_tokens_per_sec"] = ( + effective_token_num * train_result.metrics["epoch"] / train_result.metrics["train_runtime"] + ) if dist.is_initialized(): - train_result.metrics['effective_tokens_per_sec'] = train_result.metrics['effective_tokens_per_sec'] / dist.get_world_size() + train_result.metrics["effective_tokens_per_sec"] = ( + train_result.metrics["effective_tokens_per_sec"] / dist.get_world_size() + ) trainer.save_model() trainer.log_metrics("train", train_result.metrics) @@ -132,4 +137,3 @@ def run_sft( # Create model card create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) -