From bf2b8df5407af7c3373239555600736fceac4848 Mon Sep 17 00:00:00 2001
From: Ting <wtmlon@foxmail.com>
Date: Tue, 19 Nov 2024 19:10:07 +0800
Subject: [PATCH] update

Former-commit-id: ef6e14550dd76810285cee9c268590d1d9423e54
---
 src/llamafactory/train/dpo/workflow.py |  9 +++++++--
 src/llamafactory/train/sft/workflow.py | 14 +++++++++-----
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py
index a94a7eff..04f534ce 100644
--- a/src/llamafactory/train/dpo/workflow.py
+++ b/src/llamafactory/train/dpo/workflow.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 
 from typing import TYPE_CHECKING, List, Optional
+
 import torch.distributed as dist
 
 from ...data import PairwiseDataCollatorWithPadding, get_dataset, get_template_and_fix_tokenizer
@@ -85,9 +86,13 @@ def run_dpo(
     # Training
     if training_args.do_train:
         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
-        train_result.metrics['effective_tokens_per_sec'] = effi_token_num * train_result.metrics['epoch'] / train_result.metrics['train_runtime']
+        train_result.metrics["effective_tokens_per_sec"] = (
+            effi_token_num * train_result.metrics["epoch"] / train_result.metrics["train_runtime"]
+        )
         if dist.is_initialized():
-            train_result.metrics['effective_tokens_per_sec'] = train_result.metrics['effective_tokens_per_sec'] / dist.get_world_size()
+            train_result.metrics["effective_tokens_per_sec"] = (
+                train_result.metrics["effective_tokens_per_sec"] / dist.get_world_size()
+            )
 
         trainer.save_model()
         trainer.log_metrics("train", train_result.metrics)
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index c288b69f..197a4866 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 
 from typing import TYPE_CHECKING, List, Optional
+
 import torch.distributed as dist
 
 from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset, get_template_and_fix_tokenizer
@@ -66,9 +67,9 @@ def run_sft(
     training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams
     training_args.remove_unused_columns = False  # important for multimodal dataset
 
-    effi_token_num = 0.0
+    effective_token_num = 0.0
     for data in dataset_module["train_dataset"]:
-        effi_token_num += len(data["input_ids"])
+        effective_token_num += len(data["input_ids"])
 
     # Metric utils
     metric_module = {}
@@ -99,9 +100,13 @@ def run_sft(
     # Training
     if training_args.do_train:
         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
-        train_result.metrics['effective_tokens_per_sec'] = effi_token_num * train_result.metrics['epoch'] / train_result.metrics['train_runtime']
+        train_result.metrics["effective_tokens_per_sec"] = (
+            effective_token_num * train_result.metrics["epoch"] / train_result.metrics["train_runtime"]
+        )
         if dist.is_initialized():
-            train_result.metrics['effective_tokens_per_sec'] = train_result.metrics['effective_tokens_per_sec'] / dist.get_world_size()
+            train_result.metrics["effective_tokens_per_sec"] = (
+                train_result.metrics["effective_tokens_per_sec"] / dist.get_world_size()
+            )
 
         trainer.save_model()
         trainer.log_metrics("train", train_result.metrics)
@@ -132,4 +137,3 @@ def run_sft(
 
     # Create model card
     create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
-