diff --git a/README.md b/README.md index 390c0777..4535fd88 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/README_zh.md b/README_zh.md index 994c1683..998779ea 100644 --- a/README_zh.md +++ b/README_zh.md @@ -192,6 +192,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 2a64eb7c..5768cf7b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -619,9 +619,8 @@ _register_template( _register_template( - name="deepseek_v2.5", + name="deepseek3", format_user=StringFormatter(slots=["<|User|>{{content}}<|Assistant|>"]), - format_system=StringFormatter(slots=["{{content}}"]), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 00a77d04..061cfd26 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -448,6 +448,7 @@ register_model_group( }, "DeepSeek-Coder-7B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-base-v1.5", }, "DeepSeek-Coder-33B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base", @@ -459,6 +460,7 @@ register_model_group( }, "DeepSeek-Coder-7B-Instruct": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-instruct-v1.5", }, "DeepSeek-Coder-33B-Instruct": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct", @@ -469,6 +471,33 @@ register_model_group( ) +register_model_group( + models={ + "DeepSeek-V2-236B-Chat-0628": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat-0628", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat-0628", + }, + "DeepSeek-V2.5-236B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5", + }, + "DeepSeek-V2.5-236B-Chat-1210": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5-1210", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5-1210", + }, + "DeepSeek-V3-685B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3-Base", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3-Base", + }, + "DeepSeek-V3-685B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3", + }, + }, + template="deepseek3", +) + + register_model_group( models={ "EXAONE-3.0-7.8B-Instruct": {