mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-15 08:08:09 +08:00
fix sft encode
Former-commit-id: 2369a96a3200593421ae9afb06e08e2ac8010bb2
This commit is contained in:
parent
cc290a41e6
commit
8de7a01887
@ -505,7 +505,7 @@ def preprocess_data(
|
||||
input_ids, labels = [], []
|
||||
|
||||
for i in range(len(dialog) // 2):
|
||||
source_ids = tokenizer.encode(text=dialog[2*i], add_special_tokens=True)
|
||||
source_ids = tokenizer.encode(text=dialog[2*i], add_special_tokens=(i == 0))
|
||||
target_ids = tokenizer.encode(text=dialog[2*i+1], add_special_tokens=False)
|
||||
|
||||
if len(source_ids) > data_args.max_source_length:
|
||||
|
Loading…
x
Reference in New Issue
Block a user