mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 09:52:14 +08:00 
			
		
		
		
	[model] add qwen3 next (#9130)
This commit is contained in:
		
							parent
							
								
									260b5625c3
								
							
						
					
					
						commit
						812720909e
					
				@ -276,7 +276,7 @@ Choose your path:
 | 
			
		||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google)                 | 270M/1B/4B/6B/8B/12B/27B         | gemma3/gemma3n      |
 | 
			
		||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org)         | 9B/32B                           | glm4/glmz1          |
 | 
			
		||||
| [GLM-4.1V](https://huggingface.co/zai-org)                        | 9B                               | glm4v               |
 | 
			
		||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)*               | 106B/355B                        | glm4_moe/glm4v_moe  |
 | 
			
		||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)                | 106B/355B                        | glm4_moe/glm4v_moe  |
 | 
			
		||||
| [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | 
			
		||||
| [GPT-OSS](https://huggingface.co/openai)                          | 20B/120B                         | gpt                 |
 | 
			
		||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | 
			
		||||
@ -296,7 +296,7 @@ Choose your path:
 | 
			
		||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf)                     | 7B/8B/13B/34B/72B/110B           | llava_next          |
 | 
			
		||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf)               | 7B/34B                           | llava_next_video    |
 | 
			
		||||
| [MiMo](https://huggingface.co/XiaomiMiMo)                         | 7B                               | mimo                |
 | 
			
		||||
| [MiniCPM](https://huggingface.co/openbmb)                         | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
 | 
			
		||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb)                   | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
 | 
			
		||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb)     | 8B                               | minicpm_o/minicpm_v |
 | 
			
		||||
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai)        | 8B/12B                           | ministral           |
 | 
			
		||||
| [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral             |
 | 
			
		||||
@ -309,11 +309,11 @@ Choose your path:
 | 
			
		||||
| [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
 | 
			
		||||
| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
 | 
			
		||||
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | 
			
		||||
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen)      | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3/qwen3_nothink |
 | 
			
		||||
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
 | 
			
		||||
| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
 | 
			
		||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni          |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | 
			
		||||
| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed)         | 8B/36B                           | seed_coder/seed_oss |
 | 
			
		||||
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed)         | 8B/36B                           | seed_oss/seed_coder |
 | 
			
		||||
| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | 
			
		||||
| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
 | 
			
		||||
| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
 | 
			
		||||
 | 
			
		||||
@ -278,7 +278,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | 
			
		||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google)                 | 270M/1B/4B/6B/8B/12B/27B         | gemma3/gemma3n      |
 | 
			
		||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org)         | 9B/32B                           | glm4/glmz1          |
 | 
			
		||||
| [GLM-4.1V](https://huggingface.co/zai-org)                        | 9B                               | glm4v               |
 | 
			
		||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)*               | 106B/355B                        | glm4_moe/glm4v_moe  |
 | 
			
		||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)                | 106B/355B                        | glm4_moe/glm4v_moe  |
 | 
			
		||||
| [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | 
			
		||||
| [GPT-OSS](https://huggingface.co/openai)                          | 20B/120B                         | gpt                 |
 | 
			
		||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | 
			
		||||
@ -298,7 +298,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | 
			
		||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf)                     | 7B/8B/13B/34B/72B/110B           | llava_next          |
 | 
			
		||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf)               | 7B/34B                           | llava_next_video    |
 | 
			
		||||
| [MiMo](https://huggingface.co/XiaomiMiMo)                         | 7B                               | mimo                |
 | 
			
		||||
| [MiniCPM](https://huggingface.co/openbmb)                         | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
 | 
			
		||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb)                   | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
 | 
			
		||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb)     | 8B                               | minicpm_o/minicpm_v |
 | 
			
		||||
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai)        | 8B/12B                           | ministral           |
 | 
			
		||||
| [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral             |
 | 
			
		||||
@ -311,11 +311,11 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | 
			
		||||
| [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
 | 
			
		||||
| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
 | 
			
		||||
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | 
			
		||||
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen)      | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3/qwen3_nothink |
 | 
			
		||||
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
 | 
			
		||||
| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
 | 
			
		||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni          |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | 
			
		||||
| [Seed (Coder/OSS)](https://huggingface.co/ByteDance-Seed)         | 8B/36B                           | seed_coder/seed_oss |
 | 
			
		||||
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed)         | 8B/36B                           | seed_oss/seed_coder |
 | 
			
		||||
| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | 
			
		||||
| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
 | 
			
		||||
| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
 | 
			
		||||
 | 
			
		||||
@ -917,6 +917,18 @@ register_template(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# copied from chatml template
 | 
			
		||||
register_template(
 | 
			
		||||
    name="ernie",
 | 
			
		||||
    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
 | 
			
		||||
    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n\n"]),
 | 
			
		||||
    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n\n"]),
 | 
			
		||||
    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
 | 
			
		||||
    default_system="<global_setting>\nthink_mode=True\n</global_setting>",
 | 
			
		||||
    stop_words=["<|im_end|>"],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_template(
 | 
			
		||||
    name="exaone",
 | 
			
		||||
    format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
 | 
			
		||||
 | 
			
		||||
@ -78,7 +78,6 @@ SEED_TOOL_PROMPT = (
 | 
			
		||||
    "lines</parameter>\n</function>\n</seed:tool_call>\n"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
LING_TOOL_PROMPT = (
 | 
			
		||||
    "# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
 | 
			
		||||
    "You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"
 | 
			
		||||
 | 
			
		||||
@ -601,6 +601,17 @@ register_model_group(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "ERNIE-4.5-21B-A3B-Thinking": {
 | 
			
		||||
            DownloadSource.DEFAULT: "baidu/ERNIE-4.5-21B-A3B-Thinking",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="ernie",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "EXAONE-3.0-7.8B-Instruct": {
 | 
			
		||||
@ -1783,6 +1794,10 @@ register_model_group(
 | 
			
		||||
            DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B",
 | 
			
		||||
        },
 | 
			
		||||
        "MiniCPM4.1-8B-Chat": {
 | 
			
		||||
            DownloadSource.DEFAULT: "openbmb/MiniCPM4.1-8B",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4.1-8B",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="cpm4",
 | 
			
		||||
)
 | 
			
		||||
@ -1790,7 +1805,7 @@ register_model_group(
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "MiniCPM-o-2_6": {
 | 
			
		||||
        "MiniCPM-o-2.6": {
 | 
			
		||||
            DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6",
 | 
			
		||||
        },
 | 
			
		||||
@ -1802,7 +1817,7 @@ register_model_group(
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "MiniCPM-V-2_6": {
 | 
			
		||||
        "MiniCPM-V-2.6": {
 | 
			
		||||
            DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6",
 | 
			
		||||
        },
 | 
			
		||||
@ -1826,7 +1841,7 @@ register_model_group(
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "MiniCPM-V-4_5": {
 | 
			
		||||
        "MiniCPM-V-4.5": {
 | 
			
		||||
            DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5",
 | 
			
		||||
        },
 | 
			
		||||
@ -1944,6 +1959,37 @@ register_model_group(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "MobileLLM-R1-140M-Base": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M-base",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M-base",
 | 
			
		||||
        },
 | 
			
		||||
        "MobileLLM-R1-360M-Base": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M-base",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M-base",
 | 
			
		||||
        },
 | 
			
		||||
        "MobileLLM-R1-950M-Base": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M-base",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M-base",
 | 
			
		||||
        },
 | 
			
		||||
        "MobileLLM-R1-140M-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M",
 | 
			
		||||
        },
 | 
			
		||||
        "MobileLLM-R1-360M-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M",
 | 
			
		||||
        },
 | 
			
		||||
        "MobileLLM-R1-950M-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="llama3",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "Moonlight-16B-A3B": {
 | 
			
		||||
@ -2912,6 +2958,10 @@ register_model_group(
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen/Qwen3-Next-80B-A3B-Thinking": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Thinking",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Thinking",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="qwen3",
 | 
			
		||||
)
 | 
			
		||||
@ -2931,6 +2981,10 @@ register_model_group(
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen3-Next-80B-A3B-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Instruct",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="qwen3_nothink",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@ -34,31 +34,36 @@ LOCALES = {
 | 
			
		||||
        "en": {
 | 
			
		||||
            "value": (
 | 
			
		||||
                "<h3><center>Visit <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
 | 
			
		||||
                "GitHub Page</a></center></h3>"
 | 
			
		||||
                "GitHub Page</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
 | 
			
		||||
                "Documentation</a></center></h3>"
 | 
			
		||||
            ),
 | 
			
		||||
        },
 | 
			
		||||
        "ru": {
 | 
			
		||||
            "value": (
 | 
			
		||||
                "<h3><center>Посетить <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
 | 
			
		||||
                "страницу GitHub</a></center></h3>"
 | 
			
		||||
                "страницу GitHub</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
 | 
			
		||||
                "Документацию</a></center></h3>"
 | 
			
		||||
            ),
 | 
			
		||||
        },
 | 
			
		||||
        "zh": {
 | 
			
		||||
            "value": (
 | 
			
		||||
                "<h3><center>访问 <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
 | 
			
		||||
                "GitHub 主页</a></center></h3>"
 | 
			
		||||
                "GitHub 主页</a> <a href='https://llamafactory.readthedocs.io/zh-cn/latest/' target='_blank'>"
 | 
			
		||||
                "官方文档</a></center></h3>"
 | 
			
		||||
            ),
 | 
			
		||||
        },
 | 
			
		||||
        "ko": {
 | 
			
		||||
            "value": (
 | 
			
		||||
                "<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
 | 
			
		||||
                "GitHub 페이지</a>를 방문하세요.</center></h3>"
 | 
			
		||||
                "GitHub 페이지</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
 | 
			
		||||
                "공식 문서</a>를 방문하세요.</center></h3>"
 | 
			
		||||
            ),
 | 
			
		||||
        },
 | 
			
		||||
        "ja": {
 | 
			
		||||
            "value": (
 | 
			
		||||
                "<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
 | 
			
		||||
                "GitHub ページ</a>にアクセスする</center></h3>"
 | 
			
		||||
                "GitHub ページ</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
 | 
			
		||||
                "ドキュメント</a>にアクセスする</center></h3>"
 | 
			
		||||
            ),
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user