[deps] upgrade transformers (#7704)

2025-11-08 06:16:47 +08:00 · 2025-04-13 18:11:34 +08:00 · 2025-04-13 18:11:34 +08:00 · 1fd4d14fbb
commit 1fd4d14fbb
parent 481ecbf9c5
6 changed files with 22 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -239,11 +239,11 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
-| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)                  | 9B/32B                           | glm4                |
+| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\*                | 9B/32B                           | glm4                |
 | [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | [Granite 3.0-3.1](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
 | [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
 | [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2             |
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
@ -265,9 +265,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Phi-3-small](https://huggingface.co/microsoft)                   | 7B                               | phi_small           |
 | [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
 | [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
-| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
+| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
-| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 7B                               | qwen2_omni          |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 7B                               | qwen2_omni          |
 | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
@ -281,6 +281,10 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
 >
 > Remember to use the **SAME** template in training and inference.
 >
 > \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check.
 >
 > \*\*: You need to install a specific version of `transformers` to use the corresponding model.
 Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
--- a/README_zh.md
+++ b/README_zh.md
@ -242,7 +242,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma               |
 | [Gemma 3](https://huggingface.co/google)                          | 1B/4B/12B/27B                    | gemma3/gemma (1B)   |
-| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)                  | 9B/32B                           | glm4                |
+| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\*                | 9B/32B                           | glm4                |
 | [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
 | [Granite 3.0-3.1](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
 | [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
@ -251,8 +251,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
 | [Llama 3-3.3](https://huggingface.co/meta-llama)                  | 1B/3B/8B/70B                     | llama3              |
 | [Llama 3.2 Vision](https://huggingface.co/meta-llama)             | 11B/90B                          | mllama              |
 | [Llama 4](https://huggingface.co/meta-llama)                      | 109B/402B                        | llama4              |
 | [Llama 3.2 Vision](https://huggingface.co/meta-llama)             | 11B/90B                          | mllama              |
 | [LLaVA-1.5](https://huggingface.co/llava-hf)                      | 7B/13B                           | llava               |
 | [LLaVA-NeXT](https://huggingface.co/llava-hf)                     | 7B/8B/13B/34B/72B/110B           | llava_next          |
 | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf)               | 7B/34B                           | llava_next_video    |
@ -268,9 +268,9 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Phi-3-small](https://huggingface.co/microsoft)                   | 7B                               | phi_small           |
 | [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
 | [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
-| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
+| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
-| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 7B                               | qwen2_omni          |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 7B                               | qwen2_omni          |
 | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
@ -284,6 +284,10 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 > 对于所有“基座”（Base）模型，`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Instruct/Chat）模型请务必使用**对应的模板**。
 >
 > 请务必在训练和推理时采用**完全一致**的模板。
 >
 > \*：您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。
 >
 > \*\*：您需要安装特定版本的 `transformers` 以使用该模型。
 项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0
+transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
 datasets>=2.16.0,<=3.5.0
 accelerate>=0.34.0,<=1.6.0
 peft>=0.14.0,<=0.15.1
--- a/src/llamafactory/init.py
+++ b/src/llamafactory/init.py
@ -19,7 +19,7 @@ Level:
 Dependency graph:
  main:
-    transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0
+    transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
    datasets>=2.16.0,<=3.5.0
    accelerate>=0.34.0,<=1.6.0
    peft>=0.14.0,<=0.15.1
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@ -146,6 +146,7 @@ def register_model_group(
            any(suffix in name for suffix in ("-Chat", "-Distill", "-Instruct")) or multimodal
        ):
            DEFAULT_TEMPLATE[name] = template
        if multimodal:
            MULTIMODAL_SUPPORTED_MODELS.add(name)
@ -736,11 +737,11 @@ register_model_group(
            DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414",
            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414" ,
        },
-        "GLM-4-Z1-9B-0414": {
+        "GLM-4-Z1-9B-Chat-0414": {
            DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414",
            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414" ,
        },
-        "GLM-4-Z1-32B-0414": {
+        "GLM-4-Z1-32B-Chat-0414": {
            DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414",
            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414" ,
        },
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 def check_dependencies() -> None:
    r"""Check the version of the required packages."""
-    check_version("transformers>=4.41.2,<=4.51.1,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
+    check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
    check_version("datasets>=2.16.0,<=3.5.0")
    check_version("accelerate>=0.34.0,<=1.6.0")
    check_version("peft>=0.14.0,<=0.15.1")