feat: add batch size of map function in the preprocessed dataset

This commit is contained in:
naem1023
2024-09-02 13:52:47 +09:00
parent 99fd9637bd
commit 209313eeea
2 changed files with 7 additions and 0 deletions

View File

@@ -109,6 +109,10 @@ class DataArguments:
default=None,
metadata={"help": "Path to save or load the tokenized datasets."},
)
dataset_map_batch_size: Optional[int] = field(
default=None,
metadata={"help": "Batch size for dataset mapping."},
)
def __post_init__(self):
def split_arg(arg):