From 5de45bf9899e8d4f43f4308a1140586f7475828e Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 16 Nov 2023 22:54:15 +0800 Subject: [PATCH] fix chatglm template Former-commit-id: ed9f7705efbed0accf4dc5c9dfa9e3e7e15e1174 --- src/llmtuner/data/template.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 03b3c011..1bf24063 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -114,7 +114,7 @@ class Template: else: prefix_ids = sep_ids + bos_ids - query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx)) + query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx+1)) resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp]) encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids)) return encoded_pairs @@ -350,6 +350,8 @@ register_template( prefix=[ {"token": "[gMASK]"}, {"token": "sop"}, + {"token": "<|system|>"}, + "\n", "{{system}}" ], prompt=[ @@ -358,7 +360,10 @@ register_template( "{{query}}", {"token": "<|assistant|>"} ], - system="", + system=( + "You are ChatGLM3, a large language model trained by Zhipu.AI. " + "Follow the user's instructions carefully. Respond using markdown." + ), sep=[], stop_words=[ "<|user|>",