From 896dbfec165c6dd4a399ba0a401d1394d689288c Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 1 Jun 2023 23:54:44 +0800 Subject: [PATCH] fix possibly OOM error Former-commit-id: 0d590dffb41b0e832d9f87d20a23bcd0acd983aa --- src/utils/common.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/utils/common.py b/src/utils/common.py index 9137e54f..26523286 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -477,13 +477,13 @@ def preprocess_data( desc="Running tokenizer on dataset" ) - if stage == "pt": - print_unsupervised_dataset_example(dataset[0]) - elif stage == "sft": - print_supervised_dataset_example(dataset[0]) - elif stage == "rm": - print_pairwise_dataset_example(dataset[0]) - elif stage == "ppo": - print_unsupervised_dataset_example(dataset[0]) + if stage == "pt": + print_unsupervised_dataset_example(dataset[0]) + elif stage == "sft": + print_supervised_dataset_example(dataset[0]) + elif stage == "rm": + print_pairwise_dataset_example(dataset[0]) + elif stage == "ppo": + print_unsupervised_dataset_example(dataset[0]) - return dataset + return dataset