update args for MsDataset.load

Former-commit-id: 09533e95edc5fa65a38b2f04c6d88506196021b3
This commit is contained in:
xingjun.wang 2023-12-12 13:02:54 +08:00
parent 9f17d36ccf
commit 879209829e
3 changed files with 9 additions and 1 deletions

View File

@ -129,7 +129,6 @@
}, },
"firefly": { "firefly": {
"hf_hub_url": "YeungNLP/firefly-train-1.1M", "hf_hub_url": "YeungNLP/firefly-train-1.1M",
"ms_hub_url": "AI-ModelScope/firefly-train-1.1M",
"columns": { "columns": {
"prompt": "input", "prompt": "input",
"response": "target" "response": "target"

View File

@ -58,6 +58,11 @@ def get_dataset(
dataset = MsDataset.load( dataset = MsDataset.load(
dataset_name=data_path, dataset_name=data_path,
subset_name=data_name, subset_name=data_name,
split=data_args.split,
data_files=data_files,
cache_dir=model_args.cache_dir,
token=model_args.ms_hub_token,
streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
).to_hf_dataset() ).to_hf_dataset()
else: else:
dataset = load_dataset( dataset = load_dataset(

View File

@ -59,6 +59,10 @@ class ModelArguments:
default=None, default=None,
metadata={"help": "Auth token to log in with Hugging Face Hub."} metadata={"help": "Auth token to log in with Hugging Face Hub."}
) )
ms_hub_token: Optional[str] = field(
default=None,
metadata={"help": "Auth token to log in with ModelScope Hub."}
)
def __post_init__(self): def __post_init__(self):
self.compute_dtype = None self.compute_dtype = None