Former-commit-id: 7404692808f2288d539668d364965ad104dacadb
This commit is contained in:
hiyouga 2024-02-03 23:45:31 +08:00
parent a2ae5bd867
commit 996cc5d900

View File

@ -155,9 +155,6 @@ def get_dataset(
dataset = dataset.to_iterable_dataset() dataset = dataset.to_iterable_dataset()
return dataset return dataset
if data_args.streaming:
raise ValueError("Turn off dataset streaming to save cache files.")
with training_args.main_process_first(desc="load dataset"): with training_args.main_process_first(desc="load dataset"):
all_datasets = [] all_datasets = []
for dataset_attr in get_dataset_list(data_args): # TODO: add split for dataset_attr in get_dataset_list(data_args): # TODO: add split