Former-commit-id: 0d332ca8d0987c0331361934ab110fafa6402a7e
This commit is contained in:
hoshi-hiyouga 2024-09-04 19:10:30 +08:00 committed by GitHub
parent 41a9c415e1
commit 372b71c847

View File

@ -223,13 +223,14 @@ def get_dataset(
dataset_module: Dict[str, "Dataset"] = {}
if "train" in dataset_dict:
dataset_module["train_dataset"] = dataset_dict["train"]
if "validation" in dataset_dict:
dataset_module["eval_dataset"] = dataset_dict["validation"]
if data_args.streaming:
dataset_module = {k: v.to_iterable_dataset() for k, v in dataset_module.items()}
return dataset_module
return dataset_module, template
if data_args.streaming:
raise ValueError("Turn off `streaming` when saving dataset to disk.")