From 28a625bf5bbcbc02a483076a96e94d0f0103fb64 Mon Sep 17 00:00:00 2001 From: Yaowei Zheng Date: Sun, 14 Sep 2025 03:16:25 +0800 Subject: [PATCH] [model] add qwen3 next (#9130) --- README.md | 8 ++-- README_zh.md | 8 ++-- src/llamafactory/data/template.py | 12 ++++++ src/llamafactory/data/tool_utils.py | 1 - src/llamafactory/extras/constants.py | 60 ++++++++++++++++++++++++++-- src/llamafactory/webui/locales.py | 15 ++++--- 6 files changed, 87 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 906a9b36..9dcdbaf8 100644 --- a/README.md +++ b/README.md @@ -276,7 +276,7 @@ Choose your path: | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | | [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | -| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | +| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | @@ -296,7 +296,7 @@ Choose your path: | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | -| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | +| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | @@ -309,11 +309,11 @@ Choose your path: | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | -| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | +| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | -| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_coder/seed_oss | +| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | diff --git a/README_zh.md b/README_zh.md index 10621890..0ea70be2 100644 --- a/README_zh.md +++ b/README_zh.md @@ -278,7 +278,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | | [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | -| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | +| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | @@ -298,7 +298,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | -| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | +| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | @@ -311,11 +311,11 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | -| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | +| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | -| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_coder/seed_oss | +| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index eababd7b..596e7006 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -917,6 +917,18 @@ register_template( ) +# copied from chatml template +register_template( + name="ernie", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n\n"]), + format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]), + default_system="\nthink_mode=True\n", + stop_words=["<|im_end|>"], +) + + register_template( name="exaone", format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]), diff --git a/src/llamafactory/data/tool_utils.py b/src/llamafactory/data/tool_utils.py index 3e059d87..2f677f1e 100644 --- a/src/llamafactory/data/tool_utils.py +++ b/src/llamafactory/data/tool_utils.py @@ -78,7 +78,6 @@ SEED_TOOL_PROMPT = ( "lines\n\n\n" ) - LING_TOOL_PROMPT = ( "# Tools\n\nYou may call one or more functions to assist with the user query.\n\n" "You are provided with function signatures within XML tags:\n{tool_text}" diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index afd46737..9f87f60b 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -601,6 +601,17 @@ register_model_group( ) +register_model_group( + models={ + "ERNIE-4.5-21B-A3B-Thinking": { + DownloadSource.DEFAULT: "baidu/ERNIE-4.5-21B-A3B-Thinking", + DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking", + }, + }, + template="ernie", +) + + register_model_group( models={ "EXAONE-3.0-7.8B-Instruct": { @@ -1783,6 +1794,10 @@ register_model_group( DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B", }, + "MiniCPM4.1-8B-Chat": { + DownloadSource.DEFAULT: "openbmb/MiniCPM4.1-8B", + DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4.1-8B", + }, }, template="cpm4", ) @@ -1790,7 +1805,7 @@ register_model_group( register_model_group( models={ - "MiniCPM-o-2_6": { + "MiniCPM-o-2.6": { DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6", }, @@ -1802,7 +1817,7 @@ register_model_group( register_model_group( models={ - "MiniCPM-V-2_6": { + "MiniCPM-V-2.6": { DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6", }, @@ -1826,7 +1841,7 @@ register_model_group( register_model_group( models={ - "MiniCPM-V-4_5": { + "MiniCPM-V-4.5": { DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5", }, @@ -1944,6 +1959,37 @@ register_model_group( ) +register_model_group( + models={ + "MobileLLM-R1-140M-Base": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M-base", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M-base", + }, + "MobileLLM-R1-360M-Base": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M-base", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M-base", + }, + "MobileLLM-R1-950M-Base": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M-base", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M-base", + }, + "MobileLLM-R1-140M-Instruct": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M", + }, + "MobileLLM-R1-360M-Instruct": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M", + }, + "MobileLLM-R1-950M-Instruct": { + DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M", + DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M", + }, + }, + template="llama3", +) + + register_model_group( models={ "Moonlight-16B-A3B": { @@ -2912,6 +2958,10 @@ register_model_group( DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4", DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4", }, + "Qwen/Qwen3-Next-80B-A3B-Thinking": { + DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Thinking", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Thinking", + }, }, template="qwen3", ) @@ -2931,6 +2981,10 @@ register_model_group( DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507", DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507", }, + "Qwen3-Next-80B-A3B-Instruct": { + DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Instruct", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Instruct", + }, }, template="qwen3_nothink", ) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index a23a4cea..7051b30e 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -34,31 +34,36 @@ LOCALES = { "en": { "value": ( "

Visit " - "GitHub Page

" + "GitHub Page " + "Documentation" ), }, "ru": { "value": ( "

Посетить " - "страницу GitHub

" + "страницу GitHub " + "Документацию" ), }, "zh": { "value": ( "

访问 " - "GitHub 主页

" + "GitHub 主页 " + "官方文档" ), }, "ko": { "value": ( "

" - "GitHub 페이지를 방문하세요.

" + "GitHub 페이지 " + "공식 문서를 방문하세요." ), }, "ja": { "value": ( "

" - "GitHub ページにアクセスする

" + "GitHub ページ " + "ドキュメントにアクセスする" ), }, },