[assets] update model readme (#7724)

2025-12-16 03:40:34 +08:00 · 2025-04-15 00:41:09 +08:00
parent df8752e8ee
commit ac8c6fdd3a
6 changed files with 25 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -107,6 +107,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/

 ## Changelog

+[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
+
 [25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.

 [25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
@@ -123,7 +125,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/

 [25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.

-[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** model.
+[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models.

 [25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage.

@@ -239,12 +241,13 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
-| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\*                | 9B/32B                           | glm4                |
+| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4                |
 | [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | [Granite 3.0-3.1](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
 | [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2             |
+| [Kimi-VL](https://huggingface.co/moonshotai)                      | 16B                              | kimi_vl             |
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
 | [Llama 3-3.3](https://huggingface.co/meta-llama)                  | 1B/3B/8B/70B                     | llama3              |
--- a/README_zh.md
+++ b/README_zh.md
@@ -110,6 +110,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc

 ## 更新日志

+[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
+
 [25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。

 [25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
@@ -242,12 +244,13 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
-| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\*                | 9B/32B                           | glm4                |
+| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4                |
 | [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | [Granite 3.0-3.1](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
 | [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2             |
+| [Kimi-VL](https://huggingface.co/moonshotai)                      | 16B                              | kimi_vl             |
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
 | [Llama 3-3.3](https://huggingface.co/meta-llama)                  | 1B/3B/8B/70B                     | llama3              |
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
+transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0
 datasets>=2.16.0,<=3.5.0
 accelerate>=0.34.0,<=1.6.0
 peft>=0.14.0,<=0.15.1
--- a/src/llamafactory/init.py
+++ b/src/llamafactory/init.py
@@ -19,7 +19,7 @@ Level:

 Dependency graph:
  main:
-    transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
+    transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0
    datasets>=2.16.0,<=3.5.0
    accelerate>=0.34.0,<=1.6.0
    peft>=0.14.0,<=0.15.1
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -725,25 +725,25 @@ register_model_group(
            DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
            DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m",
        },
-        "GLM-4-9B-Chat-0414": {
-            DownloadSource.DEFAULT: "THUDM/GLM-4-9B-Chat-0414",
-            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-Chat-0414",
+        "GLM-4-9B-0414-Chat": {
+            DownloadSource.DEFAULT: "THUDM/GLM-4-9B-0414",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-0414",
        },
        "GLM-4-32B-0414": {
+            DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Base-0414",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Base-0414",
+        },
+        "GLM-4-32B-0414-Chat": {
            DownloadSource.DEFAULT: "THUDM/GLM-4-32B-0414",
            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414",
        },
-        "GLM-4-32B-Chat-0414": {
-            DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414",
-            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414",
+        "GLM-Z1-9B-0414-Chat": {
+            DownloadSource.DEFAULT: "THUDM/GLM-Z1-9B-0414",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-9B-0414",
        },
-        "GLM-4-Z1-9B-Chat-0414": {
-            DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414",
-            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414",
-        },
-        "GLM-4-Z1-32B-Chat-0414": {
-            DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414",
-            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414",
+        "GLM-Z1-32B-0414-Chat": {
+            DownloadSource.DEFAULT: "THUDM/GLM-Z1-32B-0414",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-32B-0414",
        },
    },
    template="glm4",
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:

 def check_dependencies() -> None:
    r"""Check the version of the required packages."""
-    check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
+    check_version("transformers>=4.41.2,<=4.51.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
    check_version("datasets>=2.16.0,<=3.5.0")
    check_version("accelerate>=0.34.0,<=1.6.0")
    check_version("peft>=0.14.0,<=0.15.1")