mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-02 03:32:50 +08:00
remove exit in preprocess
Former-commit-id: 1a800f9993d28d80d4587a08c20f5a69722436b5
This commit is contained in:
parent
317d0855d2
commit
37b844d929
@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
@ -325,12 +324,10 @@ def get_dataset(
|
||||
)
|
||||
|
||||
dataset_dict = split_dataset(dataset, eval_dataset, data_args, seed=training_args.seed)
|
||||
if data_args.tokenized_path is not None: # save tokenized dataset to disk and exit
|
||||
if data_args.tokenized_path is not None: # save tokenized dataset to disk
|
||||
if training_args.should_save:
|
||||
dataset_dict.save_to_disk(data_args.tokenized_path)
|
||||
logger.info_rank0(f"Tokenized dataset is saved at {data_args.tokenized_path}.")
|
||||
logger.info_rank0(f"Please restart the training with `tokenized_path: {data_args.tokenized_path}`.")
|
||||
|
||||
sys.exit(0)
|
||||
logger.info_rank0(f"Please launch the training with `tokenized_path: {data_args.tokenized_path}`.")
|
||||
|
||||
return get_dataset_module(dataset_dict)
|
||||
|
Loading…
x
Reference in New Issue
Block a user