diff --git a/README.md b/README.md index f4ce635a..6342c2e0 100644 --- a/README.md +++ b/README.md @@ -268,8 +268,8 @@ Choose your path: | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 | | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | -| [GLM-4.1V](https://huggingface.co/zai-org)* | 9B | glm4v | -| [GLM-4.5](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe | +| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | +| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | @@ -301,7 +301,7 @@ Choose your path: | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | -| [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 | +| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | diff --git a/README_zh.md b/README_zh.md index e970255c..18a4e74b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -270,8 +270,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 | | [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n | | [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 | -| [GLM-4.1V](https://huggingface.co/zai-org)* | 9B | glm4v | -| [GLM-4.5](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe | +| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v | +| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt | | [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | @@ -303,7 +303,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | -| [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 | +| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | diff --git a/assets/wechat.jpg b/assets/wechat.jpg index 3a9b4663..0bcc198d 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg index 9eb6d305..7cdcba5d 100644 Binary files a/assets/wechat_npu.jpg and b/assets/wechat_npu.jpg differ diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index b888b6b1..053d313f 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1047,8 +1047,9 @@ register_template( ) +# copied from glm4 template register_template( - name="glm45v", + name="glm4v_moe", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), format_assistant=StringFormatter(slots=["\n{{content}}"]), format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index f252a3a8..5467e772 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -911,12 +911,12 @@ register_model_group( register_model_group( models={ - "GLM-4.5V-Air-Thinking":{ + "GLM-4.5V-Air-Thinking": { DownloadSource.DEFAULT: "zai-org/GLM-4.5V", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.5V", } }, - template="glm45v", + template="glm4v_moe", multimodal=True, )