diff --git a/README.md b/README.md index 0b486c1d..b0f71722 100644 --- a/README.md +++ b/README.md @@ -239,11 +239,11 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) | -| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM) | 9B/32B | glm4 | +| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | -| [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan | +| [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | @@ -265,9 +265,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | -| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | +| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 7B | qwen2_omni | +| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | @@ -281,6 +281,10 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. > > Remember to use the **SAME** template in training and inference. +> +> \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check. +> +> \*\*: You need to install a specific version of `transformers` to use the corresponding model. Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported. diff --git a/README_zh.md b/README_zh.md index 9e520e74..0b8a5b50 100644 --- a/README_zh.md +++ b/README_zh.md @@ -242,7 +242,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) | -| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM) | 9B/32B | glm4 | +| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan | @@ -251,8 +251,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | | [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | -| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama | | [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 | +| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama | | [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava | | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | @@ -268,9 +268,9 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | | [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | -| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | +| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 7B | qwen2_omni | +| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | @@ -284,6 +284,10 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc > 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 > > 请务必在训练和推理时采用**完全一致**的模板。 +> +> \*:您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。 +> +> \*\*:您需要安装特定版本的 `transformers` 以使用该模型。 项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。 diff --git a/requirements.txt b/requirements.txt index e3b60787..fe60a183 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0 +transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0 datasets>=2.16.0,<=3.5.0 accelerate>=0.34.0,<=1.6.0 peft>=0.14.0,<=0.15.1 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 879b0396..f72b7198 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -19,7 +19,7 @@ Level: Dependency graph: main: - transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0 + transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0 datasets>=2.16.0,<=3.5.0 accelerate>=0.34.0,<=1.6.0 peft>=0.14.0,<=0.15.1 diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 1c688160..9291968f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -146,6 +146,7 @@ def register_model_group( any(suffix in name for suffix in ("-Chat", "-Distill", "-Instruct")) or multimodal ): DEFAULT_TEMPLATE[name] = template + if multimodal: MULTIMODAL_SUPPORTED_MODELS.add(name) @@ -736,11 +737,11 @@ register_model_group( DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414" , }, - "GLM-4-Z1-9B-0414": { + "GLM-4-Z1-9B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414" , }, - "GLM-4-Z1-32B-0414": { + "GLM-4-Z1-32B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414" , }, diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 3c0f11f3..5a650dd2 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None: def check_dependencies() -> None: r"""Check the version of the required packages.""" - check_version("transformers>=4.41.2,<=4.51.1,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0") + check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0") check_version("datasets>=2.16.0,<=3.5.0") check_version("accelerate>=0.34.0,<=1.6.0") check_version("peft>=0.14.0,<=0.15.1")