[model] add kimi vl 2506 (#8432)

2025-12-14 19:06:26 +08:00 · 2025-06-23 17:56:48 +08:00
parent 1221533542
commit 8ed085e403
4 changed files with 36 additions and 30 deletions
--- a/README_zh.md
+++ b/README_zh.md
@@ -264,6 +264,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/671B                        | deepseek3           |
 | [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai)       | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1          |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
+| [Falcon-H1](https://huggingface.co/tiiuae)                        | 0.5B/1.5B/3B/7B/34B              | falcon_h1           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
 | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4/glmz1          |
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -916,14 +916,13 @@ register_template(
 )


+# copied from chatml template
 register_template(
    name="falcon_h1",
-    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]),
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
-    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="default"),
-    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n"]),
-    format_tools=ToolFormatter(tool_format="default"),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
    stop_words=["<|im_end|>", "<|end_of_text|>"],
 )
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -635,55 +635,54 @@ register_model_group(

 register_model_group(
    models={
-        "Falcon-H1-0.5B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
-        },
        "Falcon-H1-0.5B-Base": {
            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Base",
        },
-        "Falcon-H1-1.5B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
-        },
        "Falcon-H1-1.5B-Base": {
            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Base",
        },
-        "Falcon-H1-1.5B-Deep-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
-        },
        "Falcon-H1-1.5B-Deep-Base": {
            DownloadSource.DEFAULT: "tiuae/Falcon-H1-1.5B-Deep-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Base",
        },
-        "Falcon-H1-3B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
-        },
        "Falcon-H1-3B-Base": {
            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Base",
        },
-        "Falcon-H1-7B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
-        },
        "Falcon-H1-7B-Base": {
            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Base",
        },
-        "Falcon-H1-34B-Instruct": {
-            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
-            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
-        },
        "Falcon-H1-34B-Base": {
            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Base",
            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Base",
        },
-        
+        "Falcon-H1-0.5B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
+        },
+        "Falcon-H1-1.5B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
+        },
+        "Falcon-H1-1.5B-Deep-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
+        },
+        "Falcon-H1-3B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
+        },
+        "Falcon-H1-7B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
+        },
+        "Falcon-H1-34B-Instruct": {
+            DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
+            DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
+        },
    },
    template="falcon_h1",
 )
@@ -1154,6 +1153,10 @@ register_model_group(
            DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking",
            DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking",
        },
+        "Kimi-VL-A3B-Thinking-2506": {
+            DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking-2506",
+            DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking-2506",
+        },
    },
    template="kimi_vl",
    multimodal=True,
--- a/src/llamafactory/model/model_utils/unsloth.py
+++ b/src/llamafactory/model/model_utils/unsloth.py
@@ -80,7 +80,10 @@ def get_unsloth_peft_model(


 def load_unsloth_peft_model(
-    config: "PretrainedConfig", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: bool
+    config: "PretrainedConfig",
+    model_args: "ModelArguments",
+    finetuning_args: "FinetuningArguments",
+    is_trainable: bool,
 ) -> "PreTrainedModel":
    r"""Load peft model with unsloth. Used in both training and inference."""
    from unsloth import FastLanguageModel  # type: ignore