From 964569751fc9d061ebdceaf25b2f8b5944325ebc Mon Sep 17 00:00:00 2001
From: mrhaoxx <mr.haoxx@gmail.com>
Date: Thu, 18 Dec 2025 21:26:04 +0800
Subject: [PATCH] [kt] refactor ktransformers integration (#9632)

---
 src/llamafactory/train/ksft/__init__.py |  18 ----
 src/llamafactory/train/ksft/workflow.py | 113 ------------------------
 src/llamafactory/train/sft/workflow.py  |  47 +++++++---
 src/llamafactory/train/tuner.py         |   8 +-
 4 files changed, 37 insertions(+), 149 deletions(-)
 delete mode 100644 src/llamafactory/train/ksft/__init__.py
 delete mode 100644 src/llamafactory/train/ksft/workflow.py

diff --git a/src/llamafactory/train/ksft/__init__.py b/src/llamafactory/train/ksft/__init__.py
deleted file mode 100644
index 12c53f62d..000000000
--- a/src/llamafactory/train/ksft/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2025 the KVCache.AI team, Approaching AI, and the LlamaFactory team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .workflow import run_sft
-
-
-__all__ = ["run_sft"]
diff --git a/src/llamafactory/train/ksft/workflow.py b/src/llamafactory/train/ksft/workflow.py
deleted file mode 100644
index 5478a437b..000000000
--- a/src/llamafactory/train/ksft/workflow.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2025 the KVCache.AI team, Approaching AI, and the LlamaFactory team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import TYPE_CHECKING, Optional
-
-from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset, get_template_and_fix_tokenizer
-from ...extras.constants import IGNORE_INDEX
-from ...extras.logging import get_logger
-from ...extras.misc import calculate_tps
-from ...extras.ploting import plot_loss
-from ...model import load_model, load_tokenizer
-from ..trainer_utils import create_modelcard_and_push
-
-
-if TYPE_CHECKING:
-    from transformers import Seq2SeqTrainingArguments, TrainerCallback
-
-    from ...hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
-
-
-logger = get_logger(__name__)
-
-
-def run_sft(
-    model_args: "ModelArguments",
-    data_args: "DataArguments",
-    training_args: "Seq2SeqTrainingArguments",
-    finetuning_args: "FinetuningArguments",
-    generating_args: "GeneratingArguments",
-    callbacks: Optional[list["TrainerCallback"]] = None,
-):
-    tokenizer_module = load_tokenizer(model_args)
-    tokenizer = tokenizer_module["tokenizer"]
-    template = get_template_and_fix_tokenizer(tokenizer, data_args)
-    dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module)
-    model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
-
-    from ktransformers.util.globals import GLOBAL_CONFIG
-
-    GLOBAL_CONFIG._config["mod"] = "sft"
-
-    if getattr(model, "is_quantized", False) and not training_args.do_train:
-        setattr(model, "_hf_peft_config_loaded", True)  # hack here: make model compatible with prediction
-
-    data_collator = SFTDataCollatorWith4DAttentionMask(
-        template=template,
-        model=model if not training_args.predict_with_generate else None,
-        pad_to_multiple_of=8 if training_args.do_train else None,  # for shift short attention
-        label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id,
-        block_diag_attn=model_args.block_diag_attn,
-        attn_implementation=getattr(model.config, "_attn_implementation", None),
-        compute_dtype=model_args.compute_dtype,
-        **tokenizer_module,
-    )
-
-    # Metric utils
-    metric_module = {}
-    if training_args.predict_with_generate:
-        raise NotImplementedError("`predict_with_generate` is not supported in KTransformers SFT yet.")
-    elif finetuning_args.compute_accuracy:
-        raise NotImplementedError("`compute_accuracy` is not supported in KTransformers SFT yet.")
-
-    # Initialize our Trainer
-    from ktransformers.sft.lora import KTrainer
-
-    trainer = KTrainer(
-        model=model,
-        args=training_args,
-        tokenizer=tokenizer_module,
-        data_collator=data_collator,
-        callbacks=callbacks,
-        **dataset_module,
-        **metric_module,
-    )
-    trainer.model_accepts_loss_kwargs = False
-
-    # Training
-    if training_args.do_train:
-        model.config.use_cache = False
-        train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
-        trainer.save_model()
-        if finetuning_args.include_effective_tokens_per_second:
-            train_result.metrics["effective_tokens_per_sec"] = calculate_tps(
-                dataset_module["train_dataset"], train_result.metrics, stage="sft"
-            )
-
-        trainer.log_metrics("train", train_result.metrics)
-        trainer.save_metrics("train", train_result.metrics)
-        trainer.save_state()
-        if trainer.is_world_process_zero() and finetuning_args.plot_loss:
-            keys = ["loss"]
-            if isinstance(dataset_module.get("eval_dataset"), dict):
-                keys += sum(
-                    [[f"eval_{key}_loss", f"eval_{key}_accuracy"] for key in dataset_module["eval_dataset"].keys()], []
-                )
-            else:
-                keys += ["eval_loss", "eval_accuracy"]
-
-            plot_loss(training_args.output_dir, keys=keys)
-
-    # Create model card
-    create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index ebc1301c0..b289f963d 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -68,6 +68,12 @@ def run_sft(
 
     # Metric utils
     metric_module = {}
+    if model_args.use_kt:
+        if training_args.predict_with_generate:
+            raise NotImplementedError("`predict_with_generate` is not supported in KTransformers SFT yet.")
+        elif finetuning_args.compute_accuracy:
+            raise NotImplementedError("`compute_accuracy` is not supported in KTransformers SFT yet.")
+    
     if training_args.predict_with_generate:
         metric_module["compute_metrics"] = ComputeSimilarity(tokenizer=tokenizer)
     elif finetuning_args.compute_accuracy:
@@ -92,17 +98,36 @@ def run_sft(
     gen_kwargs["pad_token_id"] = tokenizer.pad_token_id
 
     # Initialize our Trainer
-    trainer = CustomSeq2SeqTrainer(
-        model=model,
-        args=training_args,
-        finetuning_args=finetuning_args,
-        data_collator=data_collator,
-        callbacks=callbacks,
-        gen_kwargs=gen_kwargs,
-        **dataset_module,
-        **tokenizer_module,
-        **metric_module,
-    )
+    if model_args.use_kt:
+        from ktransformers.util.globals import GLOBAL_CONFIG
+        from ktransformers.sft.lora import KTrainer
+
+        GLOBAL_CONFIG._config["mod"] = "sft"
+
+        trainer = KTrainer(
+            model=model,
+            args=training_args,
+            tokenizer=tokenizer_module,
+            data_collator=data_collator,
+            callbacks=callbacks,
+            **dataset_module,
+            **metric_module,
+        )
+        trainer.model_accepts_loss_kwargs = False
+        model.config.use_cache = False
+
+    else:
+        trainer = CustomSeq2SeqTrainer(
+            model=model,
+            args=training_args,
+            finetuning_args=finetuning_args,
+            data_collator=data_collator,
+            callbacks=callbacks,
+            gen_kwargs=gen_kwargs,
+            **dataset_module,
+            **tokenizer_module,
+            **metric_module,
+        )
 
     # Training
     if training_args.do_train:
diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py
index b646890ec..90e284110 100644
--- a/src/llamafactory/train/tuner.py
+++ b/src/llamafactory/train/tuner.py
@@ -85,13 +85,7 @@ def _training_function(config: dict[str, Any]) -> None:
     elif finetuning_args.stage == "pt":
         run_pt(model_args, data_args, training_args, finetuning_args, callbacks)
     elif finetuning_args.stage == "sft":
-        if model_args.use_kt:
-            from .ksft.workflow import run_sft as run_sft_kt
-
-            run_sft_kt(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
-        else:
-            run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
-
+        run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
     elif finetuning_args.stage == "rm":
         run_rm(model_args, data_args, training_args, finetuning_args, callbacks)
     elif finetuning_args.stage == "ppo":