From e87a17464fff9f90aa2b5f361c041382c7826dec Mon Sep 17 00:00:00 2001 From: hiyouga Date: Tue, 20 Feb 2024 20:44:24 +0800 Subject: [PATCH] fix #2516 Former-commit-id: 02c8c55ce36b242338219a9a9132edfc52302206 --- src/llmtuner/data/preprocess.py | 6 +++--- src/llmtuner/webui/locales.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llmtuner/data/preprocess.py b/src/llmtuner/data/preprocess.py index 6f8bf7e8..8501d473 100644 --- a/src/llmtuner/data/preprocess.py +++ b/src/llmtuner/data/preprocess.py @@ -112,9 +112,9 @@ def preprocess_packed_supervised_dataset( input_ids += source_ids + target_ids labels += source_mask + target_ids - if template.efficient_eos: - input_ids += [tokenizer.eos_token_id] - labels += [tokenizer.eos_token_id] + if template.efficient_eos: + input_ids += [tokenizer.eos_token_id] + labels += [tokenizer.eos_token_id] total_length = len(input_ids) block_size = data_args.cutoff_len diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index a55794cd..af38aaef 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -477,7 +477,7 @@ LOCALES = { }, "zh": { "label": "序列打包", - "info": "在指令监督微调阶段将序列打包为相同长度的样本。", + "info": "在指令监督微调时将序列打包为等长样本。", }, }, "upcast_layernorm": {