diff --git a/README.md b/README.md index b0f71722..72d73688 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models. + [25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started. [25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started. @@ -123,7 +125,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ [25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks. -[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** model. +[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models. [25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage. @@ -239,12 +241,13 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) | -| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 | +| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4 | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 | +| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | | [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | diff --git a/README_zh.md b/README_zh.md index 0b8a5b50..dc0aabe2 100644 --- a/README_zh.md +++ b/README_zh.md @@ -110,6 +110,8 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc ## 更新日志 +[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。 + [25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。 [25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。 @@ -242,12 +244,13 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma 3](https://huggingface.co/google) | 1B/4B/12B/27B | gemma3/gemma (1B) | -| [GLM-4/GLM-4-0414](https://huggingface.co/THUDM)\* | 9B/32B | glm4 | +| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM) | 9B/32B | glm4 | | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 | +| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | | [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | diff --git a/requirements.txt b/requirements.txt index 67982a3a..7c26caa6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0 +transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0 datasets>=2.16.0,<=3.5.0 accelerate>=0.34.0,<=1.6.0 peft>=0.14.0,<=0.15.1 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index f72b7198..607355db 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -19,7 +19,7 @@ Level: Dependency graph: main: - transformers>=4.41.2,<=4.51.2,!=4.46.*,!=4.47.*,!=4.48.0 + transformers>=4.41.2,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0 datasets>=2.16.0,<=3.5.0 accelerate>=0.34.0,<=1.6.0 peft>=0.14.0,<=0.15.1 diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index ffa5cefa..01952fe9 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -725,25 +725,25 @@ register_model_group( DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m", DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m", }, - "GLM-4-9B-Chat-0414": { - DownloadSource.DEFAULT: "THUDM/GLM-4-9B-Chat-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-Chat-0414", + "GLM-4-9B-0414-Chat": { + DownloadSource.DEFAULT: "THUDM/GLM-4-9B-0414", + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-0414", }, "GLM-4-32B-0414": { + DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Base-0414", + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Base-0414", + }, + "GLM-4-32B-0414-Chat": { DownloadSource.DEFAULT: "THUDM/GLM-4-32B-0414", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414", }, - "GLM-4-32B-Chat-0414": { - DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414", + "GLM-Z1-9B-0414-Chat": { + DownloadSource.DEFAULT: "THUDM/GLM-Z1-9B-0414", + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-9B-0414", }, - "GLM-4-Z1-9B-Chat-0414": { - DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414", - }, - "GLM-4-Z1-32B-Chat-0414": { - DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414", + "GLM-Z1-32B-0414-Chat": { + DownloadSource.DEFAULT: "THUDM/GLM-Z1-32B-0414", + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-32B-0414", }, }, template="glm4", diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 19eee9b3..099aa46f 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None: def check_dependencies() -> None: r"""Check the version of the required packages.""" - check_version("transformers>=4.41.2,<=4.51.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0") + check_version("transformers>=4.41.2,<=4.51.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0") check_version("datasets>=2.16.0,<=3.5.0") check_version("accelerate>=0.34.0,<=1.6.0") check_version("peft>=0.14.0,<=0.15.1")