From e49f7f1afefab6d9b39edae48b2d92f0a7669bf8 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 28 Sep 2023 01:16:46 +0800 Subject: [PATCH] fix bug in packed sft dataset Former-commit-id: 51d26b2af6612e65a91c576da5270028da27b322 --- src/llmtuner/dsets/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/dsets/preprocess.py b/src/llmtuner/dsets/preprocess.py index a062076d..c096ddc7 100644 --- a/src/llmtuner/dsets/preprocess.py +++ b/src/llmtuner/dsets/preprocess.py @@ -116,7 +116,7 @@ def preprocess_dataset( # split by chunks of cutoff_len for i in range(0, total_length, block_size): model_inputs["input_ids"].append(input_ids[i: i + block_size]) - model_inputs["attention_mask"].append([1] * len(block_size)) + model_inputs["attention_mask"].append([1] * block_size) model_inputs["labels"].append(labels[i: i + block_size]) return model_inputs