[data] fix template (#8827)

This commit is contained in:
Yaowei Zheng
2025-08-06 06:58:09 +08:00
committed by GitHub
parent 387454e524
commit 59fb5643e5
4 changed files with 8 additions and 4 deletions

View File

@@ -62,7 +62,7 @@ class SupervisedDatasetProcessor(DatasetProcessor):
if self.data_args.train_on_prompt:
source_label = source_ids
elif self.template.efficient_eos:
elif self.template.efficient_eos and turn_idx != 0:
source_label = [self.tokenizer.eos_token_id] + [IGNORE_INDEX] * (source_len - 1)
else:
source_label = [IGNORE_INDEX] * source_len

View File

@@ -1069,7 +1069,9 @@ register_template(
format_assistant=StringFormatter(slots=["{{content}}<|end|>"]),
format_system=StringFormatter(slots=["<|start|>system<|message|>{{content}}<|end|>"]),
default_system="You are ChatGPT, a large language model trained by OpenAI.",
thought_words=("<|channel|>analysis<|message|>", "<|end|><|start|>assistant<|channel|>final<|message|>"),
efficient_eos=True,
template_class=ReasoningTemplate,
)