Former-commit-id: a18acf2abe28e37233bf8c8ed2600618ea3b62e9
This commit is contained in:
hiyouga 2024-06-04 00:17:36 +08:00
parent b12d4beb8a
commit 8ecf606230

View File

@ -120,8 +120,8 @@ def load_single_dataset(
logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
if data_args.max_samples is not None: # truncate dataset
indexes = np.random.permutation(len(dataset))[: data_args.max_samples]
dataset = dataset.select(indexes)
max_samples = min(data_args.max_samples, len(dataset))
dataset = dataset.select(range(max_samples))
return align_dataset(dataset, dataset_attr, data_args)