mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-23 23:30:36 +08:00
[misc] fix cache & pin transformers to 4.57.1 (#9638)
This commit is contained in:
7
.github/workflows/docker.yml
vendored
7
.github/workflows/docker.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
|||||||
tags: |
|
tags: |
|
||||||
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
|
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=min
|
||||||
|
|
||||||
- name: Build and push Docker image (NPU-A2)
|
- name: Build and push Docker image (NPU-A2)
|
||||||
if: ${{ matrix.device == 'npu' && matrix.npu_type == 'a2' }}
|
if: ${{ matrix.device == 'npu' && matrix.npu_type == 'a2' }}
|
||||||
@@ -113,7 +113,7 @@ jobs:
|
|||||||
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
||||||
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=min
|
||||||
|
|
||||||
- name: Build and push Docker image (NPU-A3)
|
- name: Build and push Docker image (NPU-A3)
|
||||||
if: ${{ matrix.device == 'npu' && matrix.npu_type == 'a3' }}
|
if: ${{ matrix.device == 'npu' && matrix.npu_type == 'a3' }}
|
||||||
@@ -129,4 +129,5 @@ jobs:
|
|||||||
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
|
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
|
||||||
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
|
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=min
|
||||||
|
# https://docs.docker.com/build/cache/backends/#cache-mode
|
||||||
|
|||||||
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
@@ -73,6 +73,7 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
|
||||||
python -m pip install ".[torch,dev]"
|
python -m pip install ".[torch,dev]"
|
||||||
|
|
||||||
- name: Install transformers
|
- name: Install transformers
|
||||||
|
|||||||
27
README.md
27
README.md
@@ -278,27 +278,21 @@ Read technical notes:
|
|||||||
|
|
||||||
| Model | Model size | Template |
|
| Model | Model size | Template |
|
||||||
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
|
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
|
||||||
| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
|
|
||||||
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
|
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
|
||||||
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
|
||||||
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (LLM/Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
|
| [DeepSeek 3-3.2](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
|
||||||
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
|
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
|
||||||
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie/ernie_nothink |
|
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie/ernie_nothink |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon/Falcon H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/11B/34B/40B/180B | falcon/falcon_h1 |
|
||||||
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
|
||||||
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
|
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
|
||||||
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
|
||||||
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
||||||
| [InternLM/Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
|
| [InternLM/Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
|
||||||
@@ -312,16 +306,13 @@ Read technical notes:
|
|||||||
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
||||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
||||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
||||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||||
| [MiMo-v2](https://huggingface.co/XiaomiMiMo) | 309B | mimo_v2 |
|
|
||||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||||
| [Ministral(3)/Mistral-Nemo](https://huggingface.co/mistralai) | 3B/8B/12B/14B | ministral/ministral3 |
|
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||||
| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small |
|
|
||||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||||
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
|
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
|
||||||
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
|
|
||||||
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
|
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
|
||||||
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
@@ -334,13 +325,9 @@ Read technical notes:
|
|||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
|
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
|
||||||
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
|
||||||
| [VibeThinker-1.5B](https://huggingface.co/WeiboAI) | 1.5B | qwen3 |
|
| [VibeThinker-1.5B](https://huggingface.co/WeiboAI) | 1.5B | qwen3 |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
|
|
||||||
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
|
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
|
||||||
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
|
|
||||||
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
|
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
|
|||||||
27
README_zh.md
27
README_zh.md
@@ -280,27 +280,21 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
|
|
||||||
| 模型名 | 参数量 | Template |
|
| 模型名 | 参数量 | Template |
|
||||||
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
|
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
|
||||||
| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
|
|
||||||
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
|
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
|
||||||
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
|
||||||
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (LLM/Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
|
| [DeepSeek 3-3.2](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
|
||||||
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
|
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
|
||||||
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie/ernie_nothink |
|
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie/ernie_nothink |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon/Falcon H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/11B/34B/40B/180B | falcon/falcon_h1 |
|
||||||
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
|
||||||
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
|
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
|
||||||
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
|
||||||
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
||||||
| [InternLM/Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
|
| [InternLM/Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
|
||||||
@@ -314,16 +308,13 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
||||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
||||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
||||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||||
| [MiMo-v2](https://huggingface.co/XiaomiMiMo) | 309B | mimo_v2 |
|
|
||||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||||
| [Ministral(3)/Mistral-Nemo](https://huggingface.co/mistralai) | 3B/8B/12B/14B | ministral/ministral3 |
|
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||||
| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small |
|
|
||||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||||
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
|
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
|
||||||
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
|
|
||||||
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
|
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
|
||||||
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
|
||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
@@ -336,13 +327,9 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
|
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
|
||||||
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
|
||||||
| [VibeThinker-1.5B](https://huggingface.co/WeiboAI) | 1.5B | qwen3 |
|
| [VibeThinker-1.5B](https://huggingface.co/WeiboAI) | 1.5B | qwen3 |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
|
|
||||||
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
|
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
|
||||||
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
|
|
||||||
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
|
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# core deps
|
# core deps
|
||||||
transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
|
transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
|
||||||
transformers>=4.49.0,<=4.57.3,!=4.52.0,!=4.57.0; python_version >= '3.10'
|
transformers>=4.49.0,<=4.57.1,!=4.52.0,!=4.57.0; python_version >= '3.10'
|
||||||
datasets>=2.16.0,<=4.0.0
|
datasets>=2.16.0,<=4.0.0
|
||||||
accelerate>=1.3.0,<=1.11.0
|
accelerate>=1.3.0,<=1.11.0
|
||||||
peft>=0.14.0,<=0.17.1
|
peft>=0.14.0,<=0.17.1
|
||||||
|
|||||||
@@ -1166,7 +1166,7 @@ register_template(
|
|||||||
|
|
||||||
|
|
||||||
register_template(
|
register_template(
|
||||||
name="gpt",
|
name="gpt_oss",
|
||||||
format_user=StringFormatter(slots=["<|start|>user<|message|>{{content}}<|end|><|start|>assistant"]),
|
format_user=StringFormatter(slots=["<|start|>user<|message|>{{content}}<|end|><|start|>assistant"]),
|
||||||
format_assistant=StringFormatter(slots=["{{content}}<|end|>"]),
|
format_assistant=StringFormatter(slots=["{{content}}<|end|>"]),
|
||||||
format_system=StringFormatter(slots=["<|start|>system<|message|>{{content}}<|end|>"]),
|
format_system=StringFormatter(slots=["<|start|>system<|message|>{{content}}<|end|>"]),
|
||||||
|
|||||||
@@ -1067,7 +1067,7 @@ register_model_group(
|
|||||||
DownloadSource.MODELSCOPE: "openai/gpt-oss-120b",
|
DownloadSource.MODELSCOPE: "openai/gpt-oss-120b",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="gpt",
|
template="gpt_oss",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -1995,6 +1995,18 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
|
"Ministral-3-3B-Base-2512": {
|
||||||
|
DownloadSource.DEFAULT: "mistralai/Ministral-3-3B-Base-2512",
|
||||||
|
DownloadSource.MODELSCOPE: "mistralai/Ministral-3-3B-Base-2512",
|
||||||
|
},
|
||||||
|
"Ministral-3-8B-Base-2512": {
|
||||||
|
DownloadSource.DEFAULT: "mistralai/Ministral-3-8B-Base-2512",
|
||||||
|
DownloadSource.MODELSCOPE: "mistralai/Ministral-3-8B-Base-2512",
|
||||||
|
},
|
||||||
|
"Ministral-3-14B-Base-2512": {
|
||||||
|
DownloadSource.DEFAULT: "mistralai/Ministral-3-14B-Base-2512",
|
||||||
|
DownloadSource.MODELSCOPE: "mistralai/Ministral-3-14B-Base-2512",
|
||||||
|
},
|
||||||
"Ministral-3-3B-Instruct-2512": {
|
"Ministral-3-3B-Instruct-2512": {
|
||||||
DownloadSource.DEFAULT: "mistralai/Ministral-3-3B-Instruct-2512",
|
DownloadSource.DEFAULT: "mistralai/Ministral-3-3B-Instruct-2512",
|
||||||
DownloadSource.MODELSCOPE: "mistralai/Ministral-3-3B-Instruct-2512",
|
DownloadSource.MODELSCOPE: "mistralai/Ministral-3-3B-Instruct-2512",
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.49.0,<=4.57.3")
|
check_version("transformers>=4.49.0,<=4.57.1")
|
||||||
check_version("datasets>=2.16.0,<=4.0.0")
|
check_version("datasets>=2.16.0,<=4.0.0")
|
||||||
check_version("accelerate>=1.3.0,<=1.11.0")
|
check_version("accelerate>=1.3.0,<=1.11.0")
|
||||||
check_version("peft>=0.14.0,<=0.17.1")
|
check_version("peft>=0.14.0,<=0.17.1")
|
||||||
|
|||||||
Reference in New Issue
Block a user