From f5350b103be3998b35e0899bba35af7cd5b6c80a Mon Sep 17 00:00:00 2001
From: qvlehao <74954167+Qwtdgh@users.noreply.github.com>
Date: Wed, 29 Jan 2025 12:16:26 +0800
Subject: [PATCH] [model] add deepseek-R1 & show think process (#6767)

Former-commit-id: 28417f862a1947a24663150ca55f421198b6d8eb
---
 README.md                            |  1 +
 README_zh.md                         |  1 +
 src/llamafactory/extras/constants.py | 32 ++++++++++++++++++++++++++++
 src/llamafactory/webui/chatter.py    |  1 +
 4 files changed, 35 insertions(+)

diff --git a/README.md b/README.md
index 7cf47d9e..0acf68c5 100644
--- a/README.md
+++ b/README.md
@@ -217,6 +217,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
 | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/685B                        | deepseek3        |
+| [DeepSeek R1](https://huggingface.co/deepseek-ai)                 | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1       |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |
diff --git a/README_zh.md b/README_zh.md
index bf5c3fd7..a20c6bce 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -219,6 +219,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
 | [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/685B                        | deepseek3        |
+| [DeepSeek R1](https://huggingface.co/deepseek-ai)                 | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1       |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 2cf00205..e98aadbd 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -493,6 +493,38 @@ register_model_group(
             DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3",
             DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3",
         },
+        "DeepSeek-R1-1.5B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        },
+        "DeepSeek-R1-7B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+        },
+        "DeepSeek-R1-8B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        },
+        "DeepSeek-R1-14B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+        },
+        "DeepSeek-R1-32B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        },
+        "DeepSeek-R1-70B-Distill": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+        },
+        "DeepSeek-R1-671B-Zero": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Zero",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Zero",
+        },        
+        "DeepSeek-R1-671B": {
+            DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1",
+            DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1",
+        },
     },
     template="deepseek3",
 )
diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py
index e9689df2..7abdf8b5 100644
--- a/src/llamafactory/webui/chatter.py
+++ b/src/llamafactory/webui/chatter.py
@@ -157,6 +157,7 @@ class WebChatModel(ChatModel):
             top_p=top_p,
             temperature=temperature,
         ):
+            new_text = '' if any(t in new_text for t in ('<think>', '</think>')) else new_text
             response += new_text
             if tools:
                 result = self.engine.template.extract_tool(response)