From 26e897e861464de58fd0ffd01e56992a5b7da130 Mon Sep 17 00:00:00 2001 From: huniu20 Date: Wed, 9 Oct 2024 17:21:37 +0800 Subject: [PATCH 1/8] 1. add modelers hub support Former-commit-id: 24ebe187e360753666b768685a0dcc78054bb702 --- README.md | 20 ++++++++++++++++++-- README_zh.md | 20 ++++++++++++++++++-- src/llamafactory/__init__.py | 1 + src/llamafactory/data/loader.py | 21 +++++++++++++++++++-- src/llamafactory/data/parser.py | 18 +++++++++++++----- src/llamafactory/extras/misc.py | 29 +++++++++++++++++++++-------- src/llamafactory/model/loader.py | 4 ++-- 7 files changed, 92 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3359c10d..40253b16 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelers-hub) for usage. + [24/09/19] We support fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models. [24/08/30] We support fine-tuning the **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** models. Thank [@simonJJJ](https://github.com/simonJJJ)'s PR. @@ -363,7 +365,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, quality +Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, quality, openmind > [!TIP] > Use `pip install --no-deps -e .` to resolve package conflicts. @@ -415,7 +417,7 @@ Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaij ### Data Preparation -Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope hub or load the dataset in local disk. +Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can use datasets on HuggingFace hub, ModelScope hub, modelers hub or load the dataset in local disk. > [!NOTE] > Please update `data/dataset_info.json` to use your custom dataset. @@ -483,6 +485,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ docker run -dit --gpus=all \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ @@ -507,6 +510,7 @@ docker build -f ./docker/docker-npu/Dockerfile \ docker run -dit \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ @@ -540,6 +544,7 @@ docker build -f ./docker/docker-rocm/Dockerfile \ docker run -dit \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -v ./saves:/app/saves \ @@ -560,6 +565,7 @@ docker exec -it llamafactory bash - `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory. - `ms_cache`: Similar to Hugging Face cache but for ModelScope users. +- `om_cache`: Similar to Hugging Face cache but for Modelers users. - `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI. - `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine. @@ -584,6 +590,16 @@ export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`. +### Download from Modelers Hub + +You can also use Modelers Hub to download models and datasets. + +```bash +export USE_OPENMIND_HUB=1 # `set USE_OPENMIND_HUB=1` for Windows +``` + +Train the model by specifying a model ID of the Modelers Hub as the `model_name_or_path`. You can find a full list of model IDs at [Modelers Hub](https://modelers.cn/models), e.g., `TeleAI/TeleChat-7B-pt`. + ### Use W&B Logger To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files. diff --git a/README_zh.md b/README_zh.md index 20e86853..61aa860c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -75,6 +75,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 ## 更新日志 +[24/10/09] 我们支持了从 **[魔乐社区](https://modelers.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔乐社区下载)。 [24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。 @@ -364,7 +365,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、quality +可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、quality、openmind > [!TIP] > 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。 @@ -416,7 +417,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh ### 数据准备 -关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope 上的数据集或加载本地数据集。 +关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace , ModelScope 或者 Modelers 上的数据集或加载本地数据集。 > [!NOTE] > 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。 @@ -484,6 +485,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ docker run -dit --gpus=all \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ @@ -508,6 +510,7 @@ docker build -f ./docker/docker-npu/Dockerfile \ docker run -dit \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ @@ -541,6 +544,7 @@ docker build -f ./docker/docker-rocm/Dockerfile \ docker run -dit \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ + -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ -v ./saves:/app/saves \ @@ -561,6 +565,7 @@ docker exec -it llamafactory bash - `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。 - `ms_cache`:类似 Hugging Face 缓存文件夹,为 ModelScope 用户提供。 +- `om_cache`:类似 Hugging Face 缓存文件夹,为 Modelers 用户提供。 - `data`:宿主机中存放数据集的文件夹路径。 - `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。 @@ -585,6 +590,17 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。 +### 从魔乐社区下载 + +您也可以通过下述方法使用魔乐社区,在魔乐社区上下载数据集和模型。 + +```bash +export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1` +``` + +将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔乐社区](https://modelers.cn/models)查看所有可用的模型,例如 `TeleAI/TeleChat-7B-pt`。 + + ### 使用 W&B 面板 若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index ffc8c9ad..dd5d1a01 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -38,6 +38,7 @@ Force check imports: FORCE_CHECK_IMPORTS=1 Force using torchrun: FORCE_TORCHRUN=1 Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN Use modelscope: USE_MODELSCOPE_HUB=1 +Use openmind: USE_OPENMIND_HUB=1 """ from .extras.env import VERSION diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 362d57e9..03a38fa8 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -53,7 +53,7 @@ def _load_single_dataset( """ logger.info("Loading dataset {}...".format(dataset_attr)) data_path, data_name, data_dir, data_files = None, None, None, None - if dataset_attr.load_from in ["hf_hub", "ms_hub"]: + if dataset_attr.load_from in ["om_hub", "hf_hub", "ms_hub"]: data_path = dataset_attr.dataset_name data_name = dataset_attr.subset data_dir = dataset_attr.folder @@ -84,7 +84,24 @@ def _load_single_dataset( else: raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from)) - if dataset_attr.load_from == "ms_hub": + if dataset_attr.load_from == "om_hub": + try: + from openmind import OmDataset + from openmind.utils.hub import OM_DATASETS_CACHE + cache_dir = model_args.cache_dir or OM_DATASETS_CACHE + dataset = OmDataset.load_dataset( + path=data_path, + name=data_name, + data_dir=data_dir, + data_files=data_files, + split=data_args.split, + cache_dir=cache_dir, + token=model_args.ms_hub_token, + streaming=(data_args.streaming and (dataset_attr.load_from != "file")), + ) + except ImportError: + raise ImportError("Please install openmind via `pip install openmind -U`") + elif dataset_attr.load_from == "ms_hub": require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0") from modelscope import MsDataset from modelscope.utils.config_ds import MS_DATASETS_CACHE diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 15a6eab8..696cd488 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence from transformers.utils import cached_file from ..extras.constants import DATA_CONFIG -from ..extras.misc import use_modelscope +from ..extras.misc import use_openmind, use_modelscope @dataclass @@ -97,8 +97,13 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - dataset_list: List["DatasetAttr"] = [] for name in dataset_names: - if dataset_info is None: # dataset_dir is ONLINE - load_from = "ms_hub" if use_modelscope() else "hf_hub" + if dataset_info is None: # dataset_dir is ONLINE + if use_openmind(): + load_from = "om_hub" + if use_modelscope(): + load_from = "ms_hub" + if load_from is None: + load_from = "hf_hub" dataset_attr = DatasetAttr(load_from, dataset_name=name) dataset_list.append(dataset_attr) continue @@ -106,11 +111,14 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - if name not in dataset_info: raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG)) + has_om_url = "om_hub_url" in dataset_info[name] has_hf_url = "hf_hub_url" in dataset_info[name] has_ms_url = "ms_hub_url" in dataset_info[name] - if has_hf_url or has_ms_url: - if (use_modelscope() and has_ms_url) or (not has_hf_url): + if has_om_url or has_hf_url or has_ms_url: + if (use_openmind() and has_om_url) or (not has_hf_url): + dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"]) + elif (use_modelscope() and has_ms_url) or (not has_hf_url): dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"]) else: dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"]) diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 7d0a457a..4c42900d 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -231,18 +231,31 @@ def torch_gc() -> None: torch.cuda.empty_cache() -def try_download_model_from_ms(model_args: "ModelArguments") -> str: - if not use_modelscope() or os.path.exists(model_args.model_name_or_path): +def try_download_model_from_other_hub(model_args: "ModelArguments") -> str: + if (not use_openmind() and not use_modelscope()) or os.path.exists(model_args.model_name_or_path): return model_args.model_name_or_path - try: - from modelscope import snapshot_download + if use_openmind(): + try: + import openmind + from openmind.utils.hub import snapshot_download - revision = "master" if model_args.model_revision == "main" else model_args.model_revision - return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir) - except ImportError: - raise ImportError("Please install modelscope via `pip install modelscope -U`") + revision = "main" if model_args.model_revision == "main" else model_args.model_revision + return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir) + except ImportError: + raise ImportError("Please install openmind and openmind_hub via `pip install openmind -U`") + if use_modelscope(): + try: + from modelscope import snapshot_download + + revision = "master" if model_args.model_revision == "main" else model_args.model_revision + return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir) + except ImportError: + raise ImportError("Please install modelscope via `pip install modelscope -U`") + +def use_openmind() -> bool: + return os.environ.get("USE_OPENMIND_HUB", "0").lower() in ["true", "1"] def use_modelscope() -> bool: return os.environ.get("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"] diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index fb71498b..7613c092 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -19,7 +19,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Se from trl import AutoModelForCausalLMWithValueHead from ..extras.logging import get_logger -from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_ms +from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_other_hub from .adapter import init_adapter from .model_utils.liger_kernel import apply_liger_kernel from .model_utils.misc import register_autoclass @@ -50,7 +50,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]: Note: including inplace operation of model_args. """ skip_check_imports() - model_args.model_name_or_path = try_download_model_from_ms(model_args) + model_args.model_name_or_path = try_download_model_from_other_hub(model_args) return { "trust_remote_code": True, "cache_dir": model_args.cache_dir, From 132c1f1b0fbd39120e8175540e2654a441a68b14 Mon Sep 17 00:00:00 2001 From: huniu20 Date: Thu, 10 Oct 2024 16:46:34 +0800 Subject: [PATCH 2/8] 1. add model and dataset info to support webui Former-commit-id: 0f669f221a31622ec7a53d0baab5da6a7891f9b6 --- README.md | 2 +- README_zh.md | 3 +-- data/dataset_info.json | 6 ++++-- src/llamafactory/data/loader.py | 2 +- src/llamafactory/data/parser.py | 1 + src/llamafactory/extras/constants.py | 12 ++++++++++++ src/llamafactory/webui/common.py | 9 ++++++++- 7 files changed, 28 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 40253b16..621a1c21 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaij ### Data Preparation -Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can use datasets on HuggingFace hub, ModelScope hub, modelers hub or load the dataset in local disk. +Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope / Modelers hub or load the dataset in local disk. > [!NOTE] > Please update `data/dataset_info.json` to use your custom dataset. diff --git a/README_zh.md b/README_zh.md index 61aa860c..1d18ad16 100644 --- a/README_zh.md +++ b/README_zh.md @@ -417,8 +417,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh ### 数据准备 -关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace , ModelScope 或者 Modelers 上的数据集或加载本地数据集。 - +关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope / Modelers 上的数据集或加载本地数据集。 > [!NOTE] > 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。 diff --git a/data/dataset_info.json b/data/dataset_info.json index f0ee130c..06224a32 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -54,7 +54,8 @@ }, "alpaca_en": { "hf_hub_url": "llamafactory/alpaca_en", - "ms_hub_url": "llamafactory/alpaca_en" + "ms_hub_url": "llamafactory/alpaca_en", + "om_hub_url": "HaM/alpaca_en" }, "alpaca_zh": { "hf_hub_url": "llamafactory/alpaca_zh", @@ -66,7 +67,8 @@ }, "alpaca_gpt4_zh": { "hf_hub_url": "llamafactory/alpaca_gpt4_zh", - "ms_hub_url": "llamafactory/alpaca_gpt4_zh" + "ms_hub_url": "llamafactory/alpaca_gpt4_zh", + "om_hub_url": "State_Cloud/alpaca-gpt4-data-zh" }, "glaive_toolcall_en": { "hf_hub_url": "llamafactory/glaive_toolcall_en", diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 03a38fa8..0849b603 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -94,7 +94,7 @@ def _load_single_dataset( name=data_name, data_dir=data_dir, data_files=data_files, - split=data_args.split, + split=dataset_attr.split, cache_dir=cache_dir, token=model_args.ms_hub_token, streaming=(data_args.streaming and (dataset_attr.load_from != "file")), diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 696cd488..879264bb 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -98,6 +98,7 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - dataset_list: List["DatasetAttr"] = [] for name in dataset_names: if dataset_info is None: # dataset_dir is ONLINE + load_from = None if use_openmind(): load_from = "om_hub" if use_modelscope(): diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 7ed20c2b..bf2386f4 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -107,6 +107,7 @@ VISION_MODELS = set() class DownloadSource(str, Enum): DEFAULT = "hf" MODELSCOPE = "ms" + MODELERS = "om" def register_model_group( @@ -163,14 +164,17 @@ register_model_group( "Baichuan2-13B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base", + DownloadSource.MODELERS: "Baichuan/Baichuan2_13b_base_pt" }, "Baichuan2-7B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat", + DownloadSource.MODELERS: "Baichuan/Baichuan2_7b_chat_pt" }, "Baichuan2-13B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat", + DownloadSource.MODELERS: "Baichuan/Baichuan2_13b_chat_pt" }, }, template="baichuan2", @@ -559,6 +563,7 @@ register_model_group( "Gemma-2-9B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-9b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it", + DownloadSource.MODELERS: "LlamaFactory/Qwen2-VL-2B-Instruct" }, "Gemma-2-27B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-27b-it", @@ -656,6 +661,7 @@ register_model_group( "InternLM2.5-20B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2_5-20b-chat", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b-chat", + DownloadSource.MODELERS: "Intern/internlm2_5-20b-chat" }, }, template="intern2", @@ -756,6 +762,7 @@ register_model_group( "Llama-3-8B-Chinese-Chat": { DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat", DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat", + DownloadSource.MODELERS: "HaM/Llama3-8B-Chinese-Chat", }, "Llama-3-70B-Chinese-Chat": { DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat", @@ -960,6 +967,7 @@ register_model_group( "MiniCPM3-4B-Chat": { DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B", + DownloadSource.MODELERS: "LlamaFactory/MiniCPM3-4B" }, }, template="cpm3", @@ -1699,6 +1707,7 @@ register_model_group( "Qwen2-VL-2B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-VL-2B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-2B-Instruct", + DownloadSource.MODELERS: "LlamaFactory/Qwen2-VL-2B-Instruct" }, "Qwen2-VL-7B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-VL-7B-Instruct", @@ -1801,10 +1810,12 @@ register_model_group( "TeleChat-7B-Chat": { DownloadSource.DEFAULT: "Tele-AI/telechat-7B", DownloadSource.MODELSCOPE: "TeleAI/telechat-7B", + DownloadSource.MODELERS: "TeleAI/TeleChat-7B-pt" }, "TeleChat-12B-Chat": { DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", + DownloadSource.MODELERS: "TeleAI/TeleChat-12B-pt", }, "TeleChat-12B-v2-Chat": { DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", @@ -2023,6 +2034,7 @@ register_model_group( "Yi-1.5-6B-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat", + DownloadSource.MODELERS: "LlamaFactory/Yi-1.5-6B-Chat" }, "Yi-1.5-9B-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat", diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index d4e9be51..a078c976 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -31,7 +31,7 @@ from ..extras.constants import ( DownloadSource, ) from ..extras.logging import get_logger -from ..extras.misc import use_modelscope +from ..extras.misc import use_modelscope, use_openmind from ..extras.packages import is_gradio_available @@ -112,6 +112,13 @@ def get_model_path(model_name: str) -> str: ): # replace path model_path = path_dict.get(DownloadSource.MODELSCOPE) + if ( + use_openmind() + and path_dict.get(DownloadSource.MODELERS) + and model_path == path_dict.get(DownloadSource.DEFAULT) + ): # replace path + model_path = path_dict.get(DownloadSource.MODELERS) + return model_path From e8e98bb125bbbe551ea6622af199cf045bb44ed3 Mon Sep 17 00:00:00 2001 From: huniu20 Date: Thu, 10 Oct 2024 17:16:46 +0800 Subject: [PATCH 3/8] add om_hub_token argument Former-commit-id: 7b91be33c9cd8473453716f0c4c6dec924304efc --- src/llamafactory/data/loader.py | 2 +- src/llamafactory/extras/constants.py | 2 +- src/llamafactory/hparams/model_args.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 0849b603..385909fc 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -96,7 +96,7 @@ def _load_single_dataset( data_files=data_files, split=dataset_attr.split, cache_dir=cache_dir, - token=model_args.ms_hub_token, + token=model_args.om_hub_token, streaming=(data_args.streaming and (dataset_attr.load_from != "file")), ) except ImportError: diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index bf2386f4..f0f6d619 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -563,7 +563,7 @@ register_model_group( "Gemma-2-9B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-9b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it", - DownloadSource.MODELERS: "LlamaFactory/Qwen2-VL-2B-Instruct" + DownloadSource.MODELERS: "LlamaFactory/gemma-2-2b-it" }, "Gemma-2-27B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-27b-it", diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index cd2f1867..9847d707 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -267,6 +267,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments, default=None, metadata={"help": "Auth token to log in with ModelScope Hub."}, ) + om_hub_token: Optional[str] = field( + default=None, + metadata={"help": "Auth token to log in with Modelers Hub."}, + ) print_param_status: bool = field( default=False, metadata={"help": "For debugging purposes, print the status of the parameters in the model."}, From 39929bda5ab2d4132a6d28225c82b68b9179120e Mon Sep 17 00:00:00 2001 From: Johnny Date: Fri, 11 Oct 2024 12:29:09 +0200 Subject: [PATCH 4/8] Update setup.py Former-commit-id: cb56673675a9d2ba359dd6d9a561084b9bad7bfd --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f42cf7d1..4dca42af 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ extra_require = { "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "vllm": ["vllm>=0.4.3,<=0.6.2"], + "vllm": ["vllm>=0.4.3,<=0.6.3"], "galore": ["galore-torch"], "badam": ["badam>=1.2.1"], "adam-mini": ["adam-mini"], From 27be1e21226f5305c04a320dc284fe43076ef66a Mon Sep 17 00:00:00 2001 From: Johnny Date: Fri, 11 Oct 2024 12:29:33 +0200 Subject: [PATCH 5/8] Update parser.py Former-commit-id: e5849cdcce109e15547edcf9a692e7c13d625e5a --- src/llamafactory/hparams/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 68a5d2f6..f9c6bfe8 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -123,7 +123,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.3,<=0.6.2", "To fix: pip install vllm>=0.4.3,<=0.6.2") + require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.2") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") From 5a3280ebee82fb4ca60f352e07bc35fe9f2ae702 Mon Sep 17 00:00:00 2001 From: huniu20 Date: Fri, 11 Oct 2024 19:56:13 +0800 Subject: [PATCH 6/8] bugs fixed Former-commit-id: 843b5d85e98b312e5d41ce62ec10e199011beb8c --- src/llamafactory/data/parser.py | 9 ++++----- src/llamafactory/extras/misc.py | 4 +--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 879264bb..92cb7cbb 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -98,12 +98,11 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - dataset_list: List["DatasetAttr"] = [] for name in dataset_names: if dataset_info is None: # dataset_dir is ONLINE - load_from = None if use_openmind(): load_from = "om_hub" - if use_modelscope(): + elif use_modelscope(): load_from = "ms_hub" - if load_from is None: + else: load_from = "hf_hub" dataset_attr = DatasetAttr(load_from, dataset_name=name) dataset_list.append(dataset_attr) @@ -117,9 +116,9 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - has_ms_url = "ms_hub_url" in dataset_info[name] if has_om_url or has_hf_url or has_ms_url: - if (use_openmind() and has_om_url) or (not has_hf_url): + if has_om_url and (use_openmind() or not has_hf_url): dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"]) - elif (use_modelscope() and has_ms_url) or (not has_hf_url): + if has_ms_url and (use_modelscope() or not has_hf_url): dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"]) else: dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"]) diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 4c42900d..d4f1b152 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -237,11 +237,9 @@ def try_download_model_from_other_hub(model_args: "ModelArguments") -> str: if use_openmind(): try: - import openmind from openmind.utils.hub import snapshot_download - revision = "main" if model_args.model_revision == "main" else model_args.model_revision - return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir) + return snapshot_download(model_args.model_name_or_path, revision=model_args.model_revision, cache_dir=model_args.cache_dir) except ImportError: raise ImportError("Please install openmind and openmind_hub via `pip install openmind -U`") From e90a1199dac789c61f20606471e974a440957b29 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 11 Oct 2024 23:51:54 +0800 Subject: [PATCH 7/8] tiny fix Former-commit-id: 3af57795dda5d236200bad4aa3f2e29ae8930fe2 --- README.md | 6 ++-- README_zh.md | 7 +++-- docker/docker-cuda/docker-compose.yml | 1 + docker/docker-npu/docker-compose.yml | 1 + docker/docker-rocm/docker-compose.yml | 1 + setup.py | 1 + src/llamafactory/data/loader.py | 38 ++++++++++++------------- src/llamafactory/data/parser.py | 20 +++++++------- src/llamafactory/extras/constants.py | 35 +++++++++++++++-------- src/llamafactory/extras/misc.py | 40 +++++++++++++++------------ src/llamafactory/hparams/parser.py | 2 +- src/llamafactory/webui/common.py | 8 +++--- 12 files changed, 91 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 621a1c21..1705fef1 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelers-hub) for usage. +[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)**. See [this tutorial](#download-from-modelers-hub) for usage. [24/09/19] We support fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models. @@ -135,7 +135,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ [23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement). -[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelscope-hub) for usage. +[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)**. See [this tutorial](#download-from-modelscope-hub) for usage. [23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune. @@ -365,7 +365,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, quality, openmind +Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, openmind, quality > [!TIP] > Use `pip install --no-deps -e .` to resolve package conflicts. diff --git a/README_zh.md b/README_zh.md index 1d18ad16..b1810b59 100644 --- a/README_zh.md +++ b/README_zh.md @@ -75,6 +75,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 ## 更新日志 + [24/10/09] 我们支持了从 **[魔乐社区](https://modelers.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔乐社区下载)。 [24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。 @@ -365,7 +366,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、quality、openmind +可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、openmind、quality > [!TIP] > 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。 @@ -418,6 +419,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh ### 数据准备 关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope / Modelers 上的数据集或加载本地数据集。 + > [!NOTE] > 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。 @@ -591,7 +593,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` ### 从魔乐社区下载 -您也可以通过下述方法使用魔乐社区,在魔乐社区上下载数据集和模型。 +您也可以通过下述方法,使用魔乐社区下载数据集和模型。 ```bash export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1` @@ -599,7 +601,6 @@ export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1` 将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔乐社区](https://modelers.cn/models)查看所有可用的模型,例如 `TeleAI/TeleChat-7B-pt`。 - ### 使用 W&B 面板 若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。 diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 7af06b74..82e05b36 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -16,6 +16,7 @@ services: volumes: - ../../hf_cache:/root/.cache/huggingface - ../../ms_cache:/root/.cache/modelscope + - ../../om_cache:/root/.cache/openmind - ../../data:/app/data - ../../output:/app/output ports: diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml index 657cba9f..333f63ac 100644 --- a/docker/docker-npu/docker-compose.yml +++ b/docker/docker-npu/docker-compose.yml @@ -10,6 +10,7 @@ services: volumes: - ../../hf_cache:/root/.cache/huggingface - ../../ms_cache:/root/.cache/modelscope + - ../../om_cache:/root/.cache/openmind - ../../data:/app/data - ../../output:/app/output - /usr/local/dcmi:/usr/local/dcmi diff --git a/docker/docker-rocm/docker-compose.yml b/docker/docker-rocm/docker-compose.yml index 2a4ea960..c5b5d748 100644 --- a/docker/docker-rocm/docker-compose.yml +++ b/docker/docker-rocm/docker-compose.yml @@ -15,6 +15,7 @@ services: volumes: - ../../hf_cache:/root/.cache/huggingface - ../../ms_cache:/root/.cache/modelscope + - ../../om_cache:/root/.cache/openmind - ../../data:/app/data - ../../output:/app/output - ../../saves:/app/saves diff --git a/setup.py b/setup.py index 4dca42af..90f6362d 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ extra_require = { "adam-mini": ["adam-mini"], "qwen": ["transformers_stream_generator"], "modelscope": ["modelscope"], + "openmind": ["openmind"], "dev": ["ruff", "pytest"], } diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 385909fc..95398e26 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -53,7 +53,7 @@ def _load_single_dataset( """ logger.info("Loading dataset {}...".format(dataset_attr)) data_path, data_name, data_dir, data_files = None, None, None, None - if dataset_attr.load_from in ["om_hub", "hf_hub", "ms_hub"]: + if dataset_attr.load_from in ["hf_hub", "ms_hub", "om_hub"]: data_path = dataset_attr.dataset_name data_name = dataset_attr.subset data_dir = dataset_attr.folder @@ -84,24 +84,7 @@ def _load_single_dataset( else: raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from)) - if dataset_attr.load_from == "om_hub": - try: - from openmind import OmDataset - from openmind.utils.hub import OM_DATASETS_CACHE - cache_dir = model_args.cache_dir or OM_DATASETS_CACHE - dataset = OmDataset.load_dataset( - path=data_path, - name=data_name, - data_dir=data_dir, - data_files=data_files, - split=dataset_attr.split, - cache_dir=cache_dir, - token=model_args.om_hub_token, - streaming=(data_args.streaming and (dataset_attr.load_from != "file")), - ) - except ImportError: - raise ImportError("Please install openmind via `pip install openmind -U`") - elif dataset_attr.load_from == "ms_hub": + if dataset_attr.load_from == "ms_hub": require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0") from modelscope import MsDataset from modelscope.utils.config_ds import MS_DATASETS_CACHE @@ -119,6 +102,23 @@ def _load_single_dataset( ) if isinstance(dataset, MsDataset): dataset = dataset.to_hf_dataset() + + elif dataset_attr.load_from == "om_hub": + require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0") + from openmind import OmDataset + from openmind.utils.hub import OM_DATASETS_CACHE + + cache_dir = model_args.cache_dir or OM_DATASETS_CACHE + dataset = OmDataset.load_dataset( + path=data_path, + name=data_name, + data_dir=data_dir, + data_files=data_files, + split=dataset_attr.split, + cache_dir=cache_dir, + token=model_args.om_hub_token, + streaming=(data_args.streaming and (dataset_attr.load_from != "file")), + ) else: dataset = load_dataset( path=data_path, diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 92cb7cbb..b9060425 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence from transformers.utils import cached_file from ..extras.constants import DATA_CONFIG -from ..extras.misc import use_openmind, use_modelscope +from ..extras.misc import use_modelscope, use_openmind @dataclass @@ -30,7 +30,7 @@ class DatasetAttr: """ # basic configs - load_from: Literal["hf_hub", "ms_hub", "script", "file"] + load_from: Literal["hf_hub", "ms_hub", "om_hub", "script", "file"] dataset_name: str formatting: Literal["alpaca", "sharegpt"] = "alpaca" ranking: bool = False @@ -97,11 +97,11 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - dataset_list: List["DatasetAttr"] = [] for name in dataset_names: - if dataset_info is None: # dataset_dir is ONLINE - if use_openmind(): - load_from = "om_hub" - elif use_modelscope(): + if dataset_info is None: # dataset_dir is ONLINE + if use_modelscope(): load_from = "ms_hub" + elif use_openmind(): + load_from = "om_hub" else: load_from = "hf_hub" dataset_attr = DatasetAttr(load_from, dataset_name=name) @@ -111,15 +111,15 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) - if name not in dataset_info: raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG)) - has_om_url = "om_hub_url" in dataset_info[name] has_hf_url = "hf_hub_url" in dataset_info[name] has_ms_url = "ms_hub_url" in dataset_info[name] + has_om_url = "om_hub_url" in dataset_info[name] - if has_om_url or has_hf_url or has_ms_url: - if has_om_url and (use_openmind() or not has_hf_url): - dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"]) + if has_hf_url or has_ms_url or has_om_url: if has_ms_url and (use_modelscope() or not has_hf_url): dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"]) + elif has_om_url and (use_openmind() or not has_hf_url): + dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"]) else: dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"]) elif "script_url" in dataset_info[name]: diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index f0f6d619..c6ddd61b 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -107,7 +107,7 @@ VISION_MODELS = set() class DownloadSource(str, Enum): DEFAULT = "hf" MODELSCOPE = "ms" - MODELERS = "om" + OPENMIND = "om" def register_model_group( @@ -164,17 +164,17 @@ register_model_group( "Baichuan2-13B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base", - DownloadSource.MODELERS: "Baichuan/Baichuan2_13b_base_pt" + DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_base_pt", }, "Baichuan2-7B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat", - DownloadSource.MODELERS: "Baichuan/Baichuan2_7b_chat_pt" + DownloadSource.OPENMIND: "Baichuan/Baichuan2_7b_chat_pt", }, "Baichuan2-13B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat", DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat", - DownloadSource.MODELERS: "Baichuan/Baichuan2_13b_chat_pt" + DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_chat_pt", }, }, template="baichuan2", @@ -559,11 +559,12 @@ register_model_group( "Gemma-2-2B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-2b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-2b-it", + DownloadSource.OPENMIND: "LlamaFactory/gemma-2-2b-it", }, "Gemma-2-9B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-9b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it", - DownloadSource.MODELERS: "LlamaFactory/gemma-2-2b-it" + DownloadSource.OPENMIND: "LlamaFactory/gemma-2-9b-it", }, "Gemma-2-27B-Instruct": { DownloadSource.DEFAULT: "google/gemma-2-27b-it", @@ -583,6 +584,7 @@ register_model_group( "GLM-4-9B-Chat": { DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat", DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat", + DownloadSource.OPENMIND: "LlamaFactory/glm-4-9b-chat", }, "GLM-4-9B-1M-Chat": { DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m", @@ -637,6 +639,7 @@ register_model_group( "InternLM2.5-1.8B": { DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b", + DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b", }, "InternLM2.5-7B": { DownloadSource.DEFAULT: "internlm/internlm2_5-7b", @@ -645,23 +648,27 @@ register_model_group( "InternLM2.5-20B": { DownloadSource.DEFAULT: "internlm/internlm2_5-20b", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b", + DownloadSource.OPENMIND: "Intern/internlm2_5-20b", }, "InternLM2.5-1.8B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b-chat", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat", + DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b-chat", }, "InternLM2.5-7B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat", + DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat", }, "InternLM2.5-7B-1M-Chat": { DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat-1m", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m", + DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat-1m", }, "InternLM2.5-20B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2_5-20b-chat", DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b-chat", - DownloadSource.MODELERS: "Intern/internlm2_5-20b-chat" + DownloadSource.OPENMIND: "Intern/internlm2_5-20b-chat", }, }, template="intern2", @@ -762,7 +769,7 @@ register_model_group( "Llama-3-8B-Chinese-Chat": { DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat", DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat", - DownloadSource.MODELERS: "HaM/Llama3-8B-Chinese-Chat", + DownloadSource.OPENMIND: "LlamaFactory/Llama3-Chinese-8B-Instruct", }, "Llama-3-70B-Chinese-Chat": { DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat", @@ -967,7 +974,7 @@ register_model_group( "MiniCPM3-4B-Chat": { DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B", - DownloadSource.MODELERS: "LlamaFactory/MiniCPM3-4B" + DownloadSource.OPENMIND: "LlamaFactory/MiniCPM3-4B", }, }, template="cpm3", @@ -1417,14 +1424,17 @@ register_model_group( "Qwen2-0.5B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct", + DownloadSource.OPENMIND: "LlamaFactory/Qwen2-0.5B-Instruct", }, "Qwen2-1.5B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct", + DownloadSource.OPENMIND: "LlamaFactory/Qwen2-1.5B-Instruct", }, "Qwen2-7B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct", + DownloadSource.OPENMIND: "LlamaFactory/Qwen2-7B-Instruct", }, "Qwen2-72B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct", @@ -1707,11 +1717,12 @@ register_model_group( "Qwen2-VL-2B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-VL-2B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-2B-Instruct", - DownloadSource.MODELERS: "LlamaFactory/Qwen2-VL-2B-Instruct" + DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-2B-Instruct", }, "Qwen2-VL-7B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-VL-7B-Instruct", DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-7B-Instruct", + DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-7B-Instruct", }, "Qwen2-VL-72B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2-VL-72B-Instruct", @@ -1810,12 +1821,12 @@ register_model_group( "TeleChat-7B-Chat": { DownloadSource.DEFAULT: "Tele-AI/telechat-7B", DownloadSource.MODELSCOPE: "TeleAI/telechat-7B", - DownloadSource.MODELERS: "TeleAI/TeleChat-7B-pt" + DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt", }, "TeleChat-12B-Chat": { DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", - DownloadSource.MODELERS: "TeleAI/TeleChat-12B-pt", + DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt", }, "TeleChat-12B-v2-Chat": { DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", @@ -2034,7 +2045,7 @@ register_model_group( "Yi-1.5-6B-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat", - DownloadSource.MODELERS: "LlamaFactory/Yi-1.5-6B-Chat" + DownloadSource.OPENMIND: "LlamaFactory/Yi-1.5-6B-Chat", }, "Yi-1.5-9B-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat", diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index d4f1b152..fd78530a 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -232,28 +232,34 @@ def torch_gc() -> None: def try_download_model_from_other_hub(model_args: "ModelArguments") -> str: - if (not use_openmind() and not use_modelscope()) or os.path.exists(model_args.model_name_or_path): + if (not use_modelscope() and not use_openmind()) or os.path.exists(model_args.model_name_or_path): return model_args.model_name_or_path - if use_openmind(): - try: - from openmind.utils.hub import snapshot_download - - return snapshot_download(model_args.model_name_or_path, revision=model_args.model_revision, cache_dir=model_args.cache_dir) - except ImportError: - raise ImportError("Please install openmind and openmind_hub via `pip install openmind -U`") - if use_modelscope(): - try: - from modelscope import snapshot_download + require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0") + from modelscope import snapshot_download - revision = "master" if model_args.model_revision == "main" else model_args.model_revision - return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir) - except ImportError: - raise ImportError("Please install modelscope via `pip install modelscope -U`") + revision = "master" if model_args.model_revision == "main" else model_args.model_revision + return snapshot_download( + model_args.model_name_or_path, + revision=revision, + cache_dir=model_args.cache_dir, + ) + + if use_openmind(): + require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0") + from openmind.utils.hub import snapshot_download + + return snapshot_download( + model_args.model_name_or_path, + revision=model_args.model_revision, + cache_dir=model_args.cache_dir, + ) -def use_openmind() -> bool: - return os.environ.get("USE_OPENMIND_HUB", "0").lower() in ["true", "1"] def use_modelscope() -> bool: return os.environ.get("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"] + + +def use_openmind() -> bool: + return os.environ.get("USE_OPENMIND_HUB", "0").lower() in ["true", "1"] diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index f9c6bfe8..f62468a4 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -123,7 +123,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.2") + require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.3") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index a078c976..b28626bc 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -109,15 +109,15 @@ def get_model_path(model_name: str) -> str: use_modelscope() and path_dict.get(DownloadSource.MODELSCOPE) and model_path == path_dict.get(DownloadSource.DEFAULT) - ): # replace path + ): # replace hf path with ms path model_path = path_dict.get(DownloadSource.MODELSCOPE) if ( use_openmind() - and path_dict.get(DownloadSource.MODELERS) + and path_dict.get(DownloadSource.OPENMIND) and model_path == path_dict.get(DownloadSource.DEFAULT) - ): # replace path - model_path = path_dict.get(DownloadSource.MODELERS) + ): # replace hf path with om path + model_path = path_dict.get(DownloadSource.OPENMIND) return model_path From 92de7261027326b4d86abe6b88d7060782f1b0d6 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 12 Oct 2024 01:24:43 +0800 Subject: [PATCH 8/8] fix #5668 Former-commit-id: 40ceba500bab7452b8671a9fbcd14bbf4a8f6f37 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 69489bec..d26961f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ datasets>=2.16.0,<=2.21.0 accelerate>=0.30.1,<=0.34.2 peft>=0.11.1,<=0.12.0 trl>=0.8.6,<=0.9.6 -gradio>=4.0.0 +gradio>=4.0.0,<5.0.0 pandas>=2.0.0 scipy einops