Compare commits

..

2 Commits

Author SHA1 Message Date
Kingsley
d8e4482849
[model] add qwen3-4b-i/t (#8851) 2025-08-07 19:39:26 +08:00
kahlun
2572111616
[data-loader] Allow dataset_dir to accept a dict for in-memory dataset_info (#8845) 2025-08-07 16:26:59 +08:00
3 changed files with 15 additions and 5 deletions

View File

@ -15,7 +15,7 @@
import json
import os
from dataclasses import dataclass
from typing import Any, Literal, Optional
from typing import Any, Literal, Optional, Union
from huggingface_hub import hf_hub_download
@ -90,12 +90,14 @@ class DatasetAttr:
self.set_attr(tag, attr["tags"])
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: str) -> list["DatasetAttr"]:
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: Union[str, dict]) -> list["DatasetAttr"]:
r"""Get the attributes of the datasets."""
if dataset_names is None:
dataset_names = []
if dataset_dir == "ONLINE":
if isinstance(dataset_dir, dict):
dataset_info = dataset_dir
elif dataset_dir == "ONLINE":
dataset_info = None
else:
if dataset_dir.startswith("REMOTE:"):

View File

@ -2755,6 +2755,14 @@ register_model_group(
DownloadSource.DEFAULT: "Qwen/Qwen3-4B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B",
},
"Qwen3-4B-Instruct-2507": {
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Instruct-2507",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Instruct-2507",
},
"Qwen3-4B-Thinking-2507": {
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Thinking-2507",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Thinking-2507",
},
"Qwen3-8B-Thinking": {
DownloadSource.DEFAULT: "Qwen/Qwen3-8B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3-8B",

View File

@ -16,7 +16,7 @@
# limitations under the License.
from dataclasses import asdict, dataclass, field
from typing import Any, Literal, Optional
from typing import Any, Literal, Optional, Union
@dataclass
@ -35,7 +35,7 @@ class DataArguments:
default=None,
metadata={"help": "The name of dataset(s) to use for evaluation. Use commas to separate multiple datasets."},
)
dataset_dir: str = field(
dataset_dir: Union[str, dict] = field(
default="data",
metadata={"help": "Path to the folder containing the datasets."},
)