mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-15 08:08:09 +08:00
[data] fix shared file system (#8179)
This commit is contained in:
parent
d4a413eb37
commit
2bf8e993ab
@ -300,7 +300,7 @@ def get_dataset(
|
|||||||
raise ValueError("Turn off `streaming` when saving dataset to disk.")
|
raise ValueError("Turn off `streaming` when saving dataset to disk.")
|
||||||
|
|
||||||
# Load and preprocess dataset
|
# Load and preprocess dataset
|
||||||
with training_args.main_process_first(desc="load dataset"):
|
with training_args.main_process_first(desc="load dataset", local=(not data_args.data_shared_file_system)):
|
||||||
dataset = _get_merged_dataset(data_args.dataset, model_args, data_args, training_args, stage)
|
dataset = _get_merged_dataset(data_args.dataset, model_args, data_args, training_args, stage)
|
||||||
eval_dataset = _get_merged_dataset(
|
eval_dataset = _get_merged_dataset(
|
||||||
data_args.eval_dataset,
|
data_args.eval_dataset,
|
||||||
@ -311,7 +311,7 @@ def get_dataset(
|
|||||||
return_dict=data_args.eval_on_each_dataset,
|
return_dict=data_args.eval_on_each_dataset,
|
||||||
)
|
)
|
||||||
|
|
||||||
with training_args.main_process_first(desc="pre-process dataset"):
|
with training_args.main_process_first(desc="pre-process dataset", local=(not data_args.data_shared_file_system)):
|
||||||
dataset = _get_preprocessed_dataset(
|
dataset = _get_preprocessed_dataset(
|
||||||
dataset, data_args, training_args, stage, template, tokenizer, processor, is_eval=False
|
dataset, data_args, training_args, stage, template, tokenizer, processor, is_eval=False
|
||||||
)
|
)
|
||||||
|
@ -133,6 +133,10 @@ class DataArguments:
|
|||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
data_shared_file_system: bool = field(
|
||||||
|
default=False,
|
||||||
|
metadata={"help": "Whether or not to use a shared file system for the datasets."},
|
||||||
|
)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
def split_arg(arg):
|
def split_arg(arg):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user