mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-07-31 10:42:50 +08:00
[model] add kimi vl 2506 (#8432)
This commit is contained in:
parent
1221533542
commit
8ed085e403
@ -264,6 +264,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
||||
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
|
||||
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
|
||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4/glmz1 |
|
||||
|
@ -916,14 +916,13 @@ register_template(
|
||||
)
|
||||
|
||||
|
||||
# copied from chatml template
|
||||
register_template(
|
||||
name="falcon_h1",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]),
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="default"),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n"]),
|
||||
format_tools=ToolFormatter(tool_format="default"),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
||||
stop_words=["<|im_end|>", "<|end_of_text|>"],
|
||||
)
|
||||
|
@ -635,55 +635,54 @@ register_model_group(
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Falcon-H1-0.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
|
||||
},
|
||||
"Falcon-H1-0.5B-Base": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Base",
|
||||
},
|
||||
"Falcon-H1-1.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
|
||||
},
|
||||
"Falcon-H1-1.5B-Base": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Base",
|
||||
},
|
||||
"Falcon-H1-1.5B-Deep-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
|
||||
},
|
||||
"Falcon-H1-1.5B-Deep-Base": {
|
||||
DownloadSource.DEFAULT: "tiuae/Falcon-H1-1.5B-Deep-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Base",
|
||||
},
|
||||
"Falcon-H1-3B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
|
||||
},
|
||||
"Falcon-H1-3B-Base": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Base",
|
||||
},
|
||||
"Falcon-H1-7B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
|
||||
},
|
||||
"Falcon-H1-7B-Base": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Base",
|
||||
},
|
||||
"Falcon-H1-34B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
|
||||
},
|
||||
"Falcon-H1-34B-Base": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Base",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Base",
|
||||
},
|
||||
|
||||
"Falcon-H1-0.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-0.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-0.5B-Instruct",
|
||||
},
|
||||
"Falcon-H1-1.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Instruct",
|
||||
},
|
||||
"Falcon-H1-1.5B-Deep-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
|
||||
},
|
||||
"Falcon-H1-3B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-3B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-3B-Instruct",
|
||||
},
|
||||
"Falcon-H1-7B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-7B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-7B-Instruct",
|
||||
},
|
||||
"Falcon-H1-34B-Instruct": {
|
||||
DownloadSource.DEFAULT: "tiiuae/Falcon-H1-34B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "tiiuae/Falcon-H1-34B-Instruct",
|
||||
},
|
||||
},
|
||||
template="falcon_h1",
|
||||
)
|
||||
@ -1154,6 +1153,10 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking",
|
||||
DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking",
|
||||
},
|
||||
"Kimi-VL-A3B-Thinking-2506": {
|
||||
DownloadSource.DEFAULT: "moonshotai/Kimi-VL-A3B-Thinking-2506",
|
||||
DownloadSource.MODELSCOPE: "moonshotai/Kimi-VL-A3B-Thinking-2506",
|
||||
},
|
||||
},
|
||||
template="kimi_vl",
|
||||
multimodal=True,
|
||||
|
@ -80,7 +80,10 @@ def get_unsloth_peft_model(
|
||||
|
||||
|
||||
def load_unsloth_peft_model(
|
||||
config: "PretrainedConfig", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: bool
|
||||
config: "PretrainedConfig",
|
||||
model_args: "ModelArguments",
|
||||
finetuning_args: "FinetuningArguments",
|
||||
is_trainable: bool,
|
||||
) -> "PreTrainedModel":
|
||||
r"""Load peft model with unsloth. Used in both training and inference."""
|
||||
from unsloth import FastLanguageModel # type: ignore
|
||||
|
Loading…
x
Reference in New Issue
Block a user