mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-19 12:12:48 +08:00
[data-loader] Allow dataset_dir
to accept a dict for in-memory dataset_info (#8845)
This commit is contained in:
parent
bc54ed8efb
commit
2572111616
@ -15,7 +15,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal, Optional, Union
|
||||||
|
|
||||||
from huggingface_hub import hf_hub_download
|
from huggingface_hub import hf_hub_download
|
||||||
|
|
||||||
@ -90,12 +90,14 @@ class DatasetAttr:
|
|||||||
self.set_attr(tag, attr["tags"])
|
self.set_attr(tag, attr["tags"])
|
||||||
|
|
||||||
|
|
||||||
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: str) -> list["DatasetAttr"]:
|
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: Union[str, dict]) -> list["DatasetAttr"]:
|
||||||
r"""Get the attributes of the datasets."""
|
r"""Get the attributes of the datasets."""
|
||||||
if dataset_names is None:
|
if dataset_names is None:
|
||||||
dataset_names = []
|
dataset_names = []
|
||||||
|
|
||||||
if dataset_dir == "ONLINE":
|
if isinstance(dataset_dir, dict):
|
||||||
|
dataset_info = dataset_dir
|
||||||
|
elif dataset_dir == "ONLINE":
|
||||||
dataset_info = None
|
dataset_info = None
|
||||||
else:
|
else:
|
||||||
if dataset_dir.startswith("REMOTE:"):
|
if dataset_dir.startswith("REMOTE:"):
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from dataclasses import asdict, dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal, Optional, Union
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -35,7 +35,7 @@ class DataArguments:
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "The name of dataset(s) to use for evaluation. Use commas to separate multiple datasets."},
|
metadata={"help": "The name of dataset(s) to use for evaluation. Use commas to separate multiple datasets."},
|
||||||
)
|
)
|
||||||
dataset_dir: str = field(
|
dataset_dir: Union[str, dict] = field(
|
||||||
default="data",
|
default="data",
|
||||||
metadata={"help": "Path to the folder containing the datasets."},
|
metadata={"help": "Path to the folder containing the datasets."},
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user