From 77cb18e9e356117e1b414a79238251f2aeef0dfc Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 16 Nov 2023 22:54:15 +0800 Subject: [PATCH] fix chatglm template Former-commit-id: 6a4b79c2e0610a17012bf3e72a2b5e8bac060092 --- src/llmtuner/data/template.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 03b3c011..1bf24063 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -114,7 +114,7 @@ class Template: else: prefix_ids = sep_ids + bos_ids - query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx)) + query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx+1)) resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp]) encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids)) return encoded_pairs @@ -350,6 +350,8 @@ register_template( prefix=[ {"token": "[gMASK]"}, {"token": "sop"}, + {"token": "<|system|>"}, + "\n", "{{system}}" ], prompt=[ @@ -358,7 +360,10 @@ register_template( "{{query}}", {"token": "<|assistant|>"} ], - system="", + system=( + "You are ChatGLM3, a large language model trained by Zhipu.AI. " + "Follow the user's instructions carefully. Respond using markdown." + ), sep=[], stop_words=[ "<|user|>",