diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index f4537d86..4243807b 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -286,6 +286,9 @@ register_template( format_user=StringFormatter(container=[ "Human: {{content}}###Assistant:" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), system=( "A chat between a curious human and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the human's questions." @@ -307,6 +310,9 @@ register_template( "{{content}}", {"token": ""} ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), efficient_eos=True ) @@ -318,6 +324,9 @@ register_template( "{{content}}", {"token": ""} ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), efficient_eos=True ) @@ -348,6 +357,9 @@ register_template( format_user=StringFormatter(container=[ "[Round {{idx}}]\n\n问:{{content}}\n\n答:" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), format_system=StringFormatter(container=[ {"token": "[gMASK]"}, {"token": "sop"}, @@ -422,6 +434,9 @@ register_template( format_user=StringFormatter(container=[ "### Instruction:\n{{content}}\n### Response:\n" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), system=( "You are an AI programming assistant, utilizing the Deepseek Coder model, " "developed by Deepseek Company, and you only answer questions related to computer science. " @@ -460,6 +475,9 @@ register_template( format_user=StringFormatter(container=[ "User: {{content}}\nFalcon:" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), separator=[ "\n" ], @@ -474,6 +492,9 @@ register_template( {"token": ""}, "\n<|Bot|>:" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), separator=[ {"token": ""}, "\n" @@ -495,6 +516,9 @@ register_template( {"token": "[UNUSED_TOKEN_146]"}, "assistant\n" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), format_system=StringFormatter(container=[ {"token": "[UNUSED_TOKEN_146]"}, "system\n{{content}}", @@ -557,6 +581,9 @@ register_template( {"token": "<|end_of_turn|>"}, "GPT4 Correct Assistant:" ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), separator=[ {"token": "<|end_of_turn|>"} ], @@ -603,6 +630,9 @@ register_template( "\n", {"token": "<|assistant|>"} ]), + format_assistant=StringFormatter(container=[ + "{{content}}" + ]), format_system=StringFormatter(container=[ {"token": "<|system|>"}, "\n{{content}}",