From ed26bb3d825fec04a2f29952e97d34136ed6af0e Mon Sep 17 00:00:00 2001 From: "xingjun.wang" Date: Tue, 12 Dec 2023 13:02:54 +0800 Subject: [PATCH] update args for MsDataset.load Former-commit-id: c5f69357a167cbf99a93607177526e787419ea05 --- src/llmtuner/data/loader.py | 5 +++++ src/llmtuner/hparams/model_args.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py index 41c12422..7bd52caa 100644 --- a/src/llmtuner/data/loader.py +++ b/src/llmtuner/data/loader.py @@ -58,6 +58,11 @@ def get_dataset( dataset = MsDataset.load( dataset_name=data_path, subset_name=data_name, + split=data_args.split, + data_files=data_files, + cache_dir=model_args.cache_dir, + token=model_args.ms_hub_token, + streaming=(data_args.streaming and (dataset_attr.load_from != "file")), ).to_hf_dataset() else: dataset = load_dataset( diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index ebf6cafa..c5819cea 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -59,6 +59,10 @@ class ModelArguments: default=None, metadata={"help": "Auth token to log in with Hugging Face Hub."} ) + ms_hub_token: Optional[str] = field( + default=None, + metadata={"help": "Auth token to log in with ModelScope Hub."} + ) def __post_init__(self): self.compute_dtype = None