mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-02 03:32:50 +08:00
[deps] upgrade transformers (#7704)
This commit is contained in:
parent
481ecbf9c5
commit
1fd4d14fbb
12
README.md
12
README.md
@ -239,11 +239,11 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
||||||
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM) | 9B/32B | glm4 |
|
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
|
||||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
|
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||||
@ -265,9 +265,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
||||||
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||||
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
||||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 7B | qwen2_omni |
|
| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni |
|
||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
@ -281,6 +281,10 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
|
> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
|
||||||
>
|
>
|
||||||
> Remember to use the **SAME** template in training and inference.
|
> Remember to use the **SAME** template in training and inference.
|
||||||
|
>
|
||||||
|
> \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check.
|
||||||
|
>
|
||||||
|
> \*\*: You need to install a specific version of `transformers` to use the corresponding model.
|
||||||
|
|
||||||
Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
|
Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
|
||||||
|
|
||||||
|
12
README_zh.md
12
README_zh.md
@ -242,7 +242,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
||||||
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM) | 9B/32B | glm4 |
|
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
@ -251,8 +251,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||||
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
||||||
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
|
|
||||||
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
|
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
|
||||||
|
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
|
||||||
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
||||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
||||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
||||||
@ -268,9 +268,9 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
||||||
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||||
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
||||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 7B | qwen2_omni |
|
| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni |
|
||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
@ -284,6 +284,10 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
> 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。
|
> 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。
|
||||||
>
|
>
|
||||||
> 请务必在训练和推理时采用**完全一致**的模板。
|
> 请务必在训练和推理时采用**完全一致**的模板。
|
||||||
|
>
|
||||||
|
> \*:您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。
|
||||||
|
>
|
||||||
|
> \*\*:您需要安装特定版本的 `transformers` 以使用该模型。
|
||||||
|
|
||||||
项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
|
项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0
|
transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
|
||||||
datasets>=2.16.0,<=3.5.0
|
datasets>=2.16.0,<=3.5.0
|
||||||
accelerate>=0.34.0,<=1.6.0
|
accelerate>=0.34.0,<=1.6.0
|
||||||
peft>=0.14.0,<=0.15.1
|
peft>=0.14.0,<=0.15.1
|
||||||
|
@ -19,7 +19,7 @@ Level:
|
|||||||
|
|
||||||
Dependency graph:
|
Dependency graph:
|
||||||
main:
|
main:
|
||||||
transformers>=4.41.2,<=4.51.1,!=4.46.*,!=4.47.*,!=4.48.0
|
transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
|
||||||
datasets>=2.16.0,<=3.5.0
|
datasets>=2.16.0,<=3.5.0
|
||||||
accelerate>=0.34.0,<=1.6.0
|
accelerate>=0.34.0,<=1.6.0
|
||||||
peft>=0.14.0,<=0.15.1
|
peft>=0.14.0,<=0.15.1
|
||||||
|
@ -146,6 +146,7 @@ def register_model_group(
|
|||||||
any(suffix in name for suffix in ("-Chat", "-Distill", "-Instruct")) or multimodal
|
any(suffix in name for suffix in ("-Chat", "-Distill", "-Instruct")) or multimodal
|
||||||
):
|
):
|
||||||
DEFAULT_TEMPLATE[name] = template
|
DEFAULT_TEMPLATE[name] = template
|
||||||
|
|
||||||
if multimodal:
|
if multimodal:
|
||||||
MULTIMODAL_SUPPORTED_MODELS.add(name)
|
MULTIMODAL_SUPPORTED_MODELS.add(name)
|
||||||
|
|
||||||
@ -736,11 +737,11 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414",
|
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414",
|
||||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414" ,
|
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414" ,
|
||||||
},
|
},
|
||||||
"GLM-4-Z1-9B-0414": {
|
"GLM-4-Z1-9B-Chat-0414": {
|
||||||
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414",
|
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414",
|
||||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414" ,
|
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414" ,
|
||||||
},
|
},
|
||||||
"GLM-4-Z1-32B-0414": {
|
"GLM-4-Z1-32B-Chat-0414": {
|
||||||
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414",
|
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414",
|
||||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414" ,
|
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414" ,
|
||||||
},
|
},
|
||||||
|
@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.41.2,<=4.51.1,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
||||||
check_version("datasets>=2.16.0,<=3.5.0")
|
check_version("datasets>=2.16.0,<=3.5.0")
|
||||||
check_version("accelerate>=0.34.0,<=1.6.0")
|
check_version("accelerate>=0.34.0,<=1.6.0")
|
||||||
check_version("peft>=0.14.0,<=0.15.1")
|
check_version("peft>=0.14.0,<=0.15.1")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user