diff --git a/src/llmtuner/data/preprocess.py b/src/llmtuner/data/preprocess.py index 6f8bf7e8..8501d473 100644 --- a/src/llmtuner/data/preprocess.py +++ b/src/llmtuner/data/preprocess.py @@ -112,9 +112,9 @@ def preprocess_packed_supervised_dataset( input_ids += source_ids + target_ids labels += source_mask + target_ids - if template.efficient_eos: - input_ids += [tokenizer.eos_token_id] - labels += [tokenizer.eos_token_id] + if template.efficient_eos: + input_ids += [tokenizer.eos_token_id] + labels += [tokenizer.eos_token_id] total_length = len(input_ids) block_size = data_args.cutoff_len diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index a55794cd..af38aaef 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -477,7 +477,7 @@ LOCALES = { }, "zh": { "label": "序列打包", - "info": "在指令监督微调阶段将序列打包为相同长度的样本。", + "info": "在指令监督微调时将序列打包为等长样本。", }, }, "upcast_layernorm": {