mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-15 03:10:35 +08:00
support val set in streaming mode
This commit is contained in:
@@ -104,9 +104,9 @@ def preprocess_dataset(
|
||||
if len(source_ids) > data_args.max_source_length:
|
||||
source_ids = source_ids[:data_args.max_source_length]
|
||||
if len(accept_ids) > data_args.max_target_length:
|
||||
accept_ids = accept_ids[:data_args.max_target_length - 1]
|
||||
accept_ids = accept_ids[:data_args.max_target_length]
|
||||
if len(reject_ids) > data_args.max_target_length:
|
||||
reject_ids = reject_ids[:data_args.max_target_length - 1]
|
||||
reject_ids = reject_ids[:data_args.max_target_length]
|
||||
|
||||
accept_ids = source_ids + accept_ids
|
||||
reject_ids = source_ids + reject_ids
|
||||
@@ -166,8 +166,5 @@ def preprocess_dataset(
|
||||
**kwargs
|
||||
)
|
||||
|
||||
if data_args.streaming:
|
||||
dataset = dataset.shuffle(buffer_size=data_args.buffer_size)
|
||||
|
||||
print_function(next(iter(dataset)))
|
||||
return dataset
|
||||
|
||||
Reference in New Issue
Block a user