mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-01 11:12:50 +08:00
[assets] update model readme (#7724)
This commit is contained in:
parent
df8752e8ee
commit
ac8c6fdd3a
@ -107,6 +107,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||
|
||||
## Changelog
|
||||
|
||||
[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
|
||||
|
||||
[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
|
||||
|
||||
[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
|
||||
@ -123,7 +125,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||
|
||||
[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
|
||||
|
||||
[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** model.
|
||||
[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models.
|
||||
|
||||
[25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage.
|
||||
|
||||
@ -239,12 +241,13 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
||||
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 |
|
||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4 |
|
||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
||||
|
@ -110,6 +110,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
||||
|
||||
## 更新日志
|
||||
|
||||
[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
|
||||
|
||||
[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
|
||||
|
||||
[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
|
||||
@ -242,12 +244,13 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||
| [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) |
|
||||
| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 |
|
||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4 |
|
||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
||||
|
@ -1,4 +1,4 @@
|
||||
transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
|
||||
transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0
|
||||
datasets>=2.16.0,<=3.5.0
|
||||
accelerate>=0.34.0,<=1.6.0
|
||||
peft>=0.14.0,<=0.15.1
|
||||
|
@ -19,7 +19,7 @@ Level:
|
||||
|
||||
Dependency graph:
|
||||
main:
|
||||
transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0
|
||||
transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0
|
||||
datasets>=2.16.0,<=3.5.0
|
||||
accelerate>=0.34.0,<=1.6.0
|
||||
peft>=0.14.0,<=0.15.1
|
||||
|
@ -725,25 +725,25 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m",
|
||||
},
|
||||
"GLM-4-9B-Chat-0414": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-9B-Chat-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-Chat-0414",
|
||||
"GLM-4-9B-0414-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-9B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-0414",
|
||||
},
|
||||
"GLM-4-32B-0414": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Base-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Base-0414",
|
||||
},
|
||||
"GLM-4-32B-0414-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414",
|
||||
},
|
||||
"GLM-4-32B-Chat-0414": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414",
|
||||
"GLM-Z1-9B-0414-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-Z1-9B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-9B-0414",
|
||||
},
|
||||
"GLM-4-Z1-9B-Chat-0414": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414",
|
||||
},
|
||||
"GLM-4-Z1-32B-Chat-0414": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414",
|
||||
"GLM-Z1-32B-0414-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-Z1-32B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-32B-0414",
|
||||
},
|
||||
},
|
||||
template="glm4",
|
||||
|
@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
||||
|
||||
def check_dependencies() -> None:
|
||||
r"""Check the version of the required packages."""
|
||||
check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
||||
check_version("transformers>=4.41.2,<=4.51.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
||||
check_version("datasets>=2.16.0,<=3.5.0")
|
||||
check_version("accelerate>=0.34.0,<=1.6.0")
|
||||
check_version("peft>=0.14.0,<=0.15.1")
|
||||
|
Loading…
x
Reference in New Issue
Block a user