mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-14 23:58:11 +08:00
fix bug in packed sft dataset
Former-commit-id: 51d26b2af6612e65a91c576da5270028da27b322
This commit is contained in:
parent
21a454fa6c
commit
e49f7f1afe
@ -116,7 +116,7 @@ def preprocess_dataset(
|
||||
# split by chunks of cutoff_len
|
||||
for i in range(0, total_length, block_size):
|
||||
model_inputs["input_ids"].append(input_ids[i: i + block_size])
|
||||
model_inputs["attention_mask"].append([1] * len(block_size))
|
||||
model_inputs["attention_mask"].append([1] * block_size)
|
||||
model_inputs["labels"].append(labels[i: i + block_size])
|
||||
|
||||
return model_inputs
|
||||
|
Loading…
x
Reference in New Issue
Block a user