mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-19 12:12:48 +08:00
[data-loader] Allow dataset_dir
to accept a dict for in-memory dataset_info (#8845)
This commit is contained in:
parent
bc54ed8efb
commit
2572111616
@ -15,7 +15,7 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal, Optional
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
@ -90,12 +90,14 @@ class DatasetAttr:
|
||||
self.set_attr(tag, attr["tags"])
|
||||
|
||||
|
||||
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: str) -> list["DatasetAttr"]:
|
||||
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: Union[str, dict]) -> list["DatasetAttr"]:
|
||||
r"""Get the attributes of the datasets."""
|
||||
if dataset_names is None:
|
||||
dataset_names = []
|
||||
|
||||
if dataset_dir == "ONLINE":
|
||||
if isinstance(dataset_dir, dict):
|
||||
dataset_info = dataset_dir
|
||||
elif dataset_dir == "ONLINE":
|
||||
dataset_info = None
|
||||
else:
|
||||
if dataset_dir.startswith("REMOTE:"):
|
||||
|
@ -16,7 +16,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Literal, Optional
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -35,7 +35,7 @@ class DataArguments:
|
||||
default=None,
|
||||
metadata={"help": "The name of dataset(s) to use for evaluation. Use commas to separate multiple datasets."},
|
||||
)
|
||||
dataset_dir: str = field(
|
||||
dataset_dir: Union[str, dict] = field(
|
||||
default="data",
|
||||
metadata={"help": "Path to the folder containing the datasets."},
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user