From 8ed085e403da6f2f77ab317f0499db52ea2ccb60 Mon Sep 17 00:00:00 2001 From: Yaowei Zheng Date: Mon, 23 Jun 2025 17:56:48 +0800 Subject: [PATCH] [model] add kimi vl 2506 (#8432) --- README_zh.md | 1 + src/llamafactory/data/template.py | 7 ++- src/llamafactory/extras/constants.py | 53 ++++++++++--------- src/llamafactory/model/model_utils/unsloth.py | 5 +- 4 files changed, 36 insertions(+), 30 deletions(-) diff --git a/README_zh.md b/README_zh.md index 05f75244..530218bf 100644 --- a/README_zh.md +++ b/README_zh.md @@ -264,6 +264,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 | | [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4/glmz1 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 4a769662..b4eda7f5 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -916,14 +916,13 @@ register_template( ) +# copied from chatml template register_template( name="falcon_h1", - format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]), + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), - format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="default"), - format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n"]), - format_tools=ToolFormatter(tool_format="default"), + format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|im_end|>", "<|end_of_text|>"], ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 85c886c0..08b7e13c 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -635,55 +635,54 @@ register_model_group( register_model_group( models={ - "Falcon-H1-0.5B-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct", - }, "Falcon-H1-0.5B-Base": { DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Base", }, - "Falcon-H1-1.5B-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct", - }, "Falcon-H1-1.5B-Base": { DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Base", }, - "Falcon-H1-1.5B-Deep-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct", - }, "Falcon-H1-1.5B-Deep-Base": { DownloadSource.DEFAULT: "tiuae/Falcon-H1-1.5B-Deep-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Base", }, - "Falcon-H1-3B-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct", - }, "Falcon-H1-3B-Base": { DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Base", }, - "Falcon-H1-7B-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct", - }, "Falcon-H1-7B-Base": { DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Base", }, - "Falcon-H1-34B-Instruct": { - DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct", - DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct", - }, "Falcon-H1-34B-Base": { DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Base", DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Base", }, - + "Falcon-H1-0.5B-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct", + }, + "Falcon-H1-1.5B-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct", + }, + "Falcon-H1-1.5B-Deep-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct", + }, + "Falcon-H1-3B-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct", + }, + "Falcon-H1-7B-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct", + }, + "Falcon-H1-34B-Instruct": { + DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct", + DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct", + }, }, template="falcon_h1", ) @@ -1154,6 +1153,10 @@ register_model_group( DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking", DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking", }, + "Kimi-VL-A3B-Thinking-2506": { + DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking-2506", + DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking-2506", + }, }, template="kimi_vl", multimodal=True, diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py index 7792857a..91e18dac 100644 --- a/src/llamafactory/model/model_utils/unsloth.py +++ b/src/llamafactory/model/model_utils/unsloth.py @@ -80,7 +80,10 @@ def get_unsloth_peft_model( def load_unsloth_peft_model( - config: "PretrainedConfig", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: bool + config: "PretrainedConfig", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + is_trainable: bool, ) -> "PreTrainedModel": r"""Load peft model with unsloth. Used in both training and inference.""" from unsloth import FastLanguageModel # type: ignore