From 8ecf606230605f3b38c21582293f4368c8c47bec Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 4 Jun 2024 00:17:36 +0800 Subject: [PATCH] fix #3992 Former-commit-id: a18acf2abe28e37233bf8c8ed2600618ea3b62e9 --- src/llamafactory/data/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index f5929f15..4d0503c3 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -120,8 +120,8 @@ def load_single_dataset( logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr)) if data_args.max_samples is not None: # truncate dataset - indexes = np.random.permutation(len(dataset))[: data_args.max_samples] - dataset = dataset.select(indexes) + max_samples = min(data_args.max_samples, len(dataset)) + dataset = dataset.select(range(max_samples)) return align_dataset(dataset, dataset_attr, data_args)