From 8ed085e403da6f2f77ab317f0499db52ea2ccb60 Mon Sep 17 00:00:00 2001
From: Yaowei Zheng <hiyouga@buaa.edu.cn>
Date: Mon, 23 Jun 2025 17:56:48 +0800
Subject: [PATCH] [model] add kimi vl 2506 (#8432)

---
 README_zh.md                                  |  1 +
 src/llamafactory/data/template.py             |  7 ++-
 src/llamafactory/extras/constants.py          | 53 ++++++++++---------
 src/llamafactory/model/model_utils/unsloth.py |  5 +-
 4 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/README_zh.md b/README_zh.md
index 05f75244..530218bf 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -264,6 +264,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/671B                        | deepseek3           |
 | [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai)       | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1          |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
+| [Falcon-H1](https://huggingface.co/tiiuae)                        | 0.5B/1.5B/3B/7B/34B              | falcon_h1           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
 | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4/glmz1          |
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 4a769662..b4eda7f5 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -916,14 +916,13 @@ register_template(
 )
 
 
+# copied from chatml template
 register_template(
     name="falcon_h1",
-    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]),
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
     format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
     format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
-    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="default"),
-    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n"]),
-    format_tools=ToolFormatter(tool_format="default"),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
     format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
     stop_words=["<|im_end|>", "<|end_of_text|>"],
 )
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 85c886c0..08b7e13c 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -635,55 +635,54 @@ register_model_group(
 
 register_model_group(
     models={
-        "Falcon-H1-0.5B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
-        },
         "Falcon-H1-0.5B-Base": {
             DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Base",
         },
-        "Falcon-H1-1.5B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
-        },
         "Falcon-H1-1.5B-Base": {
             DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Base",
         },
-        "Falcon-H1-1.5B-Deep-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
-        },
         "Falcon-H1-1.5B-Deep-Base": {
             DownloadSource.DEFAULT: "tiuae/Falcon-H1-1.5B-Deep-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Base",
         },
-        "Falcon-H1-3B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
-        },
         "Falcon-H1-3B-Base": {
             DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Base",
         },
-        "Falcon-H1-7B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
-        },
         "Falcon-H1-7B-Base": {
             DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Base",
         },
-        "Falcon-H1-34B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
-        },
         "Falcon-H1-34B-Base": {
             DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Base",
             DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Base",
         },
-        
+        "Falcon-H1-0.5B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
+        },
+        "Falcon-H1-1.5B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
+        },
+        "Falcon-H1-1.5B-Deep-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
+        },
+        "Falcon-H1-3B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
+        },
+        "Falcon-H1-7B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
+        },
+        "Falcon-H1-34B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
+        },
     },
     template="falcon_h1",
 )
@@ -1154,6 +1153,10 @@ register_model_group(
             DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking",
             DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking",
         },
+        "Kimi-VL-A3B-Thinking-2506": {
+            DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking-2506",
+            DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking-2506",
+        },
     },
     template="kimi_vl",
     multimodal=True,
diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py
index 7792857a..91e18dac 100644
--- a/src/llamafactory/model/model_utils/unsloth.py
+++ b/src/llamafactory/model/model_utils/unsloth.py
@@ -80,7 +80,10 @@ def get_unsloth_peft_model(
 
 
 def load_unsloth_peft_model(
-    config: "PretrainedConfig", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: bool
+    config: "PretrainedConfig",
+    model_args: "ModelArguments",
+    finetuning_args: "FinetuningArguments",
+    is_trainable: bool,
 ) -> "PreTrainedModel":
     r"""Load peft model with unsloth. Used in both training and inference."""
     from unsloth import FastLanguageModel  # type: ignore