mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-24 06:42:52 +08:00
Merge pull request #6492 from hiyouga/hiyouga/add_deepseek3
[model] add deepseek3 model Former-commit-id: 2382a5f0317d768ba8f4931977f5caed6057b3c0
This commit is contained in:
commit
bb664d2fc5
@ -191,6 +191,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
||||||
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
|
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
|
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
|
||||||
|
@ -192,6 +192,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
|||||||
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
|
||||||
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
|
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
|
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
|
||||||
|
@ -619,9 +619,8 @@ _register_template(
|
|||||||
|
|
||||||
|
|
||||||
_register_template(
|
_register_template(
|
||||||
name="deepseek_v2.5",
|
name="deepseek3",
|
||||||
format_user=StringFormatter(slots=["<|User|>{{content}}<|Assistant|>"]),
|
format_user=StringFormatter(slots=["<|User|>{{content}}<|Assistant|>"]),
|
||||||
format_system=StringFormatter(slots=["{{content}}"]),
|
|
||||||
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -448,6 +448,7 @@ register_model_group(
|
|||||||
},
|
},
|
||||||
"DeepSeek-Coder-7B-Base": {
|
"DeepSeek-Coder-7B-Base": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-base-v1.5",
|
||||||
},
|
},
|
||||||
"DeepSeek-Coder-33B-Base": {
|
"DeepSeek-Coder-33B-Base": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
|
||||||
@ -459,6 +460,7 @@ register_model_group(
|
|||||||
},
|
},
|
||||||
"DeepSeek-Coder-7B-Instruct": {
|
"DeepSeek-Coder-7B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
|
||||||
},
|
},
|
||||||
"DeepSeek-Coder-33B-Instruct": {
|
"DeepSeek-Coder-33B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
|
||||||
@ -469,6 +471,33 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"DeepSeek-V2-236B-Chat-0628": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat-0628",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat-0628",
|
||||||
|
},
|
||||||
|
"DeepSeek-V2.5-236B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5",
|
||||||
|
},
|
||||||
|
"DeepSeek-V2.5-236B-Chat-1210": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5-1210",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5-1210",
|
||||||
|
},
|
||||||
|
"DeepSeek-V3-685B-Base": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3-Base",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3-Base",
|
||||||
|
},
|
||||||
|
"DeepSeek-V3-685B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="deepseek3",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"EXAONE-3.0-7.8B-Instruct": {
|
"EXAONE-3.0-7.8B-Instruct": {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user