[model] add qwen3 next (#9130)

This commit is contained in:
Yaowei Zheng 2025-09-14 03:16:25 +08:00 committed by GitHub
parent 5d89af9e58
commit 28a625bf5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 87 additions and 17 deletions

View File

@ -276,7 +276,7 @@ Choose your path:
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | | [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | | [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
@ -296,7 +296,7 @@ Choose your path:
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
@ -309,11 +309,11 @@ Choose your path:
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_coder/seed_oss | | [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |

View File

@ -278,7 +278,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | | [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | | [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
@ -298,7 +298,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
@ -311,11 +311,11 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_coder/seed_oss | | [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |

View File

@ -917,6 +917,18 @@ register_template(
) )
# copied from chatml template
register_template(
name="ernie",
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n\n"]),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n\n"]),
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
default_system="<global_setting>\nthink_mode=True\n</global_setting>",
stop_words=["<|im_end|>"],
)
register_template( register_template(
name="exaone", name="exaone",
format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]), format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),

View File

@ -78,7 +78,6 @@ SEED_TOOL_PROMPT = (
"lines</parameter>\n</function>\n</seed:tool_call>\n" "lines</parameter>\n</function>\n</seed:tool_call>\n"
) )
LING_TOOL_PROMPT = ( LING_TOOL_PROMPT = (
"# Tools\n\nYou may call one or more functions to assist with the user query.\n\n" "# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}" "You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"

View File

@ -601,6 +601,17 @@ register_model_group(
) )
register_model_group(
models={
"ERNIE-4.5-21B-A3B-Thinking": {
DownloadSource.DEFAULT: "baidu/ERNIE-4.5-21B-A3B-Thinking",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking",
},
},
template="ernie",
)
register_model_group( register_model_group(
models={ models={
"EXAONE-3.0-7.8B-Instruct": { "EXAONE-3.0-7.8B-Instruct": {
@ -1783,6 +1794,10 @@ register_model_group(
DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B", DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B",
}, },
"MiniCPM4.1-8B-Chat": {
DownloadSource.DEFAULT: "openbmb/MiniCPM4.1-8B",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4.1-8B",
},
}, },
template="cpm4", template="cpm4",
) )
@ -1790,7 +1805,7 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"MiniCPM-o-2_6": { "MiniCPM-o-2.6": {
DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6", DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6",
}, },
@ -1802,7 +1817,7 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"MiniCPM-V-2_6": { "MiniCPM-V-2.6": {
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6", DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6",
}, },
@ -1826,7 +1841,7 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"MiniCPM-V-4_5": { "MiniCPM-V-4.5": {
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5", DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5",
}, },
@ -1944,6 +1959,37 @@ register_model_group(
) )
register_model_group(
models={
"MobileLLM-R1-140M-Base": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M-base",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M-base",
},
"MobileLLM-R1-360M-Base": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M-base",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M-base",
},
"MobileLLM-R1-950M-Base": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M-base",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M-base",
},
"MobileLLM-R1-140M-Instruct": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M",
},
"MobileLLM-R1-360M-Instruct": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M",
},
"MobileLLM-R1-950M-Instruct": {
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M",
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M",
},
},
template="llama3",
)
register_model_group( register_model_group(
models={ models={
"Moonlight-16B-A3B": { "Moonlight-16B-A3B": {
@ -2912,6 +2958,10 @@ register_model_group(
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4", DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4", DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
}, },
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Thinking",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Thinking",
},
}, },
template="qwen3", template="qwen3",
) )
@ -2931,6 +2981,10 @@ register_model_group(
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507", DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507", DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
}, },
"Qwen3-Next-80B-A3B-Instruct": {
DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Instruct",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Instruct",
},
}, },
template="qwen3_nothink", template="qwen3_nothink",
) )

View File

@ -34,31 +34,36 @@ LOCALES = {
"en": { "en": {
"value": ( "value": (
"<h3><center>Visit <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>" "<h3><center>Visit <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
"GitHub Page</a></center></h3>" "GitHub Page</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
"Documentation</a></center></h3>"
), ),
}, },
"ru": { "ru": {
"value": ( "value": (
"<h3><center>Посетить <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>" "<h3><center>Посетить <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
"страницу GitHub</a></center></h3>" "страницу GitHub</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
"Документацию</a></center></h3>"
), ),
}, },
"zh": { "zh": {
"value": ( "value": (
"<h3><center>访问 <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>" "<h3><center>访问 <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
"GitHub 主页</a></center></h3>" "GitHub 主页</a> <a href='https://llamafactory.readthedocs.io/zh-cn/latest/' target='_blank'>"
"官方文档</a></center></h3>"
), ),
}, },
"ko": { "ko": {
"value": ( "value": (
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>" "<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
"GitHub 페이지</a>를 방문하세요.</center></h3>" "GitHub 페이지</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
"공식 문서</a>를 방문하세요.</center></h3>"
), ),
}, },
"ja": { "ja": {
"value": ( "value": (
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>" "<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
"GitHub ページ</a>にアクセスする</center></h3>" "GitHub ページ</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
"ドキュメント</a>にアクセスする</center></h3>"
), ),
}, },
}, },