diff --git a/README.md b/README.md index 7cf47d9e..0acf68c5 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 | +| [DeepSeek R1](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/README_zh.md b/README_zh.md index bf5c3fd7..a20c6bce 100644 --- a/README_zh.md +++ b/README_zh.md @@ -219,6 +219,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 | +| [DeepSeek R1](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 2cf00205..e98aadbd 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -493,6 +493,38 @@ register_model_group( DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3", DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3", }, + "DeepSeek-R1-1.5B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + }, + "DeepSeek-R1-7B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + }, + "DeepSeek-R1-8B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + }, + "DeepSeek-R1-14B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + }, + "DeepSeek-R1-32B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + }, + "DeepSeek-R1-70B-Distill": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + }, + "DeepSeek-R1-671B-Zero": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Zero", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Zero", + }, + "DeepSeek-R1-671B": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1", + }, }, template="deepseek3", ) diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index e9689df2..7abdf8b5 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -157,6 +157,7 @@ class WebChatModel(ChatModel): top_p=top_p, temperature=temperature, ): + new_text = '' if any(t in new_text for t in ('', '')) else new_text response += new_text if tools: result = self.engine.template.extract_tool(response)