mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-03 04:02:49 +08:00
remove exit in preprocess
Former-commit-id: 1a800f9993d28d80d4587a08c20f5a69722436b5
This commit is contained in:
parent
317d0855d2
commit
37b844d929
@ -13,7 +13,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
|
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -325,12 +324,10 @@ def get_dataset(
|
|||||||
)
|
)
|
||||||
|
|
||||||
dataset_dict = split_dataset(dataset, eval_dataset, data_args, seed=training_args.seed)
|
dataset_dict = split_dataset(dataset, eval_dataset, data_args, seed=training_args.seed)
|
||||||
if data_args.tokenized_path is not None: # save tokenized dataset to disk and exit
|
if data_args.tokenized_path is not None: # save tokenized dataset to disk
|
||||||
if training_args.should_save:
|
if training_args.should_save:
|
||||||
dataset_dict.save_to_disk(data_args.tokenized_path)
|
dataset_dict.save_to_disk(data_args.tokenized_path)
|
||||||
logger.info_rank0(f"Tokenized dataset is saved at {data_args.tokenized_path}.")
|
logger.info_rank0(f"Tokenized dataset is saved at {data_args.tokenized_path}.")
|
||||||
logger.info_rank0(f"Please restart the training with `tokenized_path: {data_args.tokenized_path}`.")
|
logger.info_rank0(f"Please launch the training with `tokenized_path: {data_args.tokenized_path}`.")
|
||||||
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
return get_dataset_module(dataset_dict)
|
return get_dataset_module(dataset_dict)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user