fix chatglm template

Former-commit-id: ed9f7705efbed0accf4dc5c9dfa9e3e7e15e1174
This commit is contained in:
hiyouga 2023-11-16 22:54:15 +08:00
parent 4a6cb33f0c
commit 5de45bf989

View File

@ -114,7 +114,7 @@ class Template:
else: else:
prefix_ids = sep_ids + bos_ids prefix_ids = sep_ids + bos_ids
query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx)) query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx+1))
resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp]) resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp])
encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids)) encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids))
return encoded_pairs return encoded_pairs
@ -350,6 +350,8 @@ register_template(
prefix=[ prefix=[
{"token": "[gMASK]"}, {"token": "[gMASK]"},
{"token": "sop"}, {"token": "sop"},
{"token": "<|system|>"},
"\n",
"{{system}}" "{{system}}"
], ],
prompt=[ prompt=[
@ -358,7 +360,10 @@ register_template(
"{{query}}", "{{query}}",
{"token": "<|assistant|>"} {"token": "<|assistant|>"}
], ],
system="", system=(
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
"Follow the user's instructions carefully. Respond using markdown."
),
sep=[], sep=[],
stop_words=[ stop_words=[
"<|user|>", "<|user|>",