mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-22 22:02:51 +08:00
Merge pull request #7244 from hiyouga/hiyouga/token
[data] avoid exit after saving preprocessed data Former-commit-id: dcbf01b0035062fa14187e5bdbb925080d349501
This commit is contained in:
commit
1c634d9c53
@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
@ -325,12 +324,10 @@ def get_dataset(
|
||||
)
|
||||
|
||||
dataset_dict = split_dataset(dataset, eval_dataset, data_args, seed=training_args.seed)
|
||||
if data_args.tokenized_path is not None: # save tokenized dataset to disk and exit
|
||||
if data_args.tokenized_path is not None: # save tokenized dataset to disk
|
||||
if training_args.should_save:
|
||||
dataset_dict.save_to_disk(data_args.tokenized_path)
|
||||
logger.info_rank0(f"Tokenized dataset is saved at {data_args.tokenized_path}.")
|
||||
logger.info_rank0(f"Please restart the training with `tokenized_path: {data_args.tokenized_path}`.")
|
||||
|
||||
sys.exit(0)
|
||||
logger.info_rank0(f"Please launch the training with `tokenized_path: {data_args.tokenized_path}`.")
|
||||
|
||||
return get_dataset_module(dataset_dict)
|
||||
|
Loading…
x
Reference in New Issue
Block a user