update baichuan2 template

Former-commit-id: 0531886e1f
2026-03-13 07:26:00 +08:00 · 2023-09-06 21:43:06 +08:00
parent 0ba72273d2
commit 51f662860d
4 changed files with 70 additions and 27 deletions
--- a/README.md
+++ b/README.md
@@ -49,14 +49,14 @@
 ## Supported Models

 | Model                                                    | Model size                  | Default module    | Template  |
-| -------------------------------------------------------- | --------------------------- | ----------------- |----------|
+| -------------------------------------------------------- | --------------------------- | ----------------- | --------- |
 | [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B              | q_proj,v_proj     | -         |
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [BLOOM](https://huggingface.co/bigscience/bloom)         | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value   | -         |
 | [BLOOMZ](https://huggingface.co/bigscience/bloomz)       | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value   | -         |
 | [Falcon](https://huggingface.co/tiiuae/falcon-7b)        | 7B/40B                      | query_key_value   | -         |
 | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B                      | W_pack            | baichuan  |
-| [Baichuan2](https://github.com/baichuan-inc/Baichuan2)   | 7B/13B                      | W_pack            | baichuan |
+| [Baichuan2](https://github.com/baichuan-inc/Baichuan2)   | 7B/13B                      | W_pack            | baichuan2 |
 | [InternLM](https://github.com/InternLM/InternLM)         | 7B                          | q_proj,v_proj     | intern    |
 | [Qwen](https://github.com/QwenLM/Qwen-7B)                | 7B                          | c_attn            | chatml    |
 | [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | xverse    |
--- a/README_zh.md
+++ b/README_zh.md
@@ -49,14 +49,14 @@
 ## 模型

 | 模型名                                                   | 模型大小                     | 默认模块           | Template  |
-| -------------------------------------------------------- | --------------------------- | ----------------- |----------|
+| -------------------------------------------------------- | --------------------------- | ----------------- | --------- |
 | [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B              | q_proj,v_proj     | -         |
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [BLOOM](https://huggingface.co/bigscience/bloom)         | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value   | -         |
 | [BLOOMZ](https://huggingface.co/bigscience/bloomz)       | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value   | -         |
 | [Falcon](https://huggingface.co/tiiuae/falcon-7b)        | 7B/40B                      | query_key_value   | -         |
 | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B                      | W_pack            | baichuan  |
-| [Baichuan2](https://github.com/baichuan-inc/Baichuan2)   | 7B/13B                      | W_pack            | baichuan |
+| [Baichuan2](https://github.com/baichuan-inc/Baichuan2)   | 7B/13B                      | W_pack            | baichuan2 |
 | [InternLM](https://github.com/InternLM/InternLM)         | 7B                          | q_proj,v_proj     | intern    |
 | [Qwen](https://github.com/QwenLM/Qwen-7B)                | 7B                          | c_attn            | chatml    |
 | [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | xverse    |
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@@ -78,7 +78,7 @@ DEFAULT_TEMPLATE = {
    "LLaMA2": "llama2",
    "ChineseLLaMA2": "llama2_zh",
    "Baichuan": "baichuan",
-    "Baichuan2": "baichuan",
+    "Baichuan2": "baichuan2",
    "InternLM": "intern",
    "Qwen": "chatml",
    "XVERSE": "xverse",
--- a/src/llmtuner/extras/template.py
+++ b/src/llmtuner/extras/template.py
@@ -516,6 +516,49 @@ register_template(
 )


+r"""
+Supports: https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
+          https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat
+Used for training and inference of the fine-tuned models.
+"""
+register_template(
+    name="baichuan2",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        {"token": "<reserved_106>"}, # user token
+        "{{query}}",
+        {"token": "<reserved_107>"} # assistant token
+    ],
+    system="",
+    sep=[]
+)
+
+
+r"""
+Supports: https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
+          https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat
+Used for inference of the original model.
+"""
+register_template(
+    name="baichuan2_eval",
+    prefix=[
+        "{{system}}",
+        {"token": "<reserved_106>"} # user token
+    ],
+    prompt=[
+        "{{query}}",
+        {"token": "<reserved_107>"} # assistant token
+    ],
+    system="",
+    sep=[],
+    stop_words=[
+        "<reserved_106>" # user token
+    ]
+)
+
+
 r"""
 Supports: https://huggingface.co/HuggingFaceH4/starchat-alpha
          https://huggingface.co/HuggingFaceH4/starchat-beta