diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 03b3c011..1bf24063 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -114,7 +114,7 @@ class Template: else: prefix_ids = sep_ids + bos_ids - query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx)) + query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx+1)) resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp]) encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids)) return encoded_pairs @@ -350,6 +350,8 @@ register_template( prefix=[ {"token": "[gMASK]"}, {"token": "sop"}, + {"token": "<|system|>"}, + "\n", "{{system}}" ], prompt=[ @@ -358,7 +360,10 @@ register_template( "{{query}}", {"token": "<|assistant|>"} ], - system="", + system=( + "You are ChatGLM3, a large language model trained by Zhipu.AI. " + "Follow the user's instructions carefully. Respond using markdown." + ), sep=[], stop_words=[ "<|user|>",