mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 22:32:54 +08:00
Merge branch 'hiyouga:main' into pixtral-patch
Former-commit-id: 95330893c5cd290430a0a2a4e4afa87afab2eb88
This commit is contained in:
commit
8ea1c5c69e
22
README.md
22
README.md
@ -75,6 +75,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)**. See [this tutorial](#download-from-modelers-hub) for usage.
|
||||||
|
|
||||||
[24/09/19] We support fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models.
|
[24/09/19] We support fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models.
|
||||||
|
|
||||||
[24/08/30] We support fine-tuning the **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** models. Thank [@simonJJJ](https://github.com/simonJJJ)'s PR.
|
[24/08/30] We support fine-tuning the **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** models. Thank [@simonJJJ](https://github.com/simonJJJ)'s PR.
|
||||||
@ -133,7 +135,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
|
[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
|
||||||
|
|
||||||
[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelscope-hub) for usage.
|
[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)**. See [this tutorial](#download-from-modelscope-hub) for usage.
|
||||||
|
|
||||||
[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune.
|
[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune.
|
||||||
|
|
||||||
@ -364,7 +366,7 @@ cd LLaMA-Factory
|
|||||||
pip install -e ".[torch,metrics]"
|
pip install -e ".[torch,metrics]"
|
||||||
```
|
```
|
||||||
|
|
||||||
Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, quality
|
Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, adam-mini, qwen, modelscope, openmind, quality
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Use `pip install --no-deps -e .` to resolve package conflicts.
|
> Use `pip install --no-deps -e .` to resolve package conflicts.
|
||||||
@ -416,7 +418,7 @@ Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaij
|
|||||||
|
|
||||||
### Data Preparation
|
### Data Preparation
|
||||||
|
|
||||||
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope hub or load the dataset in local disk.
|
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope / Modelers hub or load the dataset in local disk.
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> Please update `data/dataset_info.json` to use your custom dataset.
|
> Please update `data/dataset_info.json` to use your custom dataset.
|
||||||
@ -484,6 +486,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
|
|||||||
docker run -dit --gpus=all \
|
docker run -dit --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-p 7860:7860 \
|
-p 7860:7860 \
|
||||||
@ -508,6 +511,7 @@ docker build -f ./docker/docker-npu/Dockerfile \
|
|||||||
docker run -dit \
|
docker run -dit \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-v /usr/local/dcmi:/usr/local/dcmi \
|
-v /usr/local/dcmi:/usr/local/dcmi \
|
||||||
@ -541,6 +545,7 @@ docker build -f ./docker/docker-rocm/Dockerfile \
|
|||||||
docker run -dit \
|
docker run -dit \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-v ./saves:/app/saves \
|
-v ./saves:/app/saves \
|
||||||
@ -561,6 +566,7 @@ docker exec -it llamafactory bash
|
|||||||
|
|
||||||
- `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
|
- `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
|
||||||
- `ms_cache`: Similar to Hugging Face cache but for ModelScope users.
|
- `ms_cache`: Similar to Hugging Face cache but for ModelScope users.
|
||||||
|
- `om_cache`: Similar to Hugging Face cache but for Modelers users.
|
||||||
- `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
- `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
||||||
- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
||||||
|
|
||||||
@ -585,6 +591,16 @@ export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
|
|||||||
|
|
||||||
Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
|
Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
|
||||||
|
|
||||||
|
### Download from Modelers Hub
|
||||||
|
|
||||||
|
You can also use Modelers Hub to download models and datasets.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export USE_OPENMIND_HUB=1 # `set USE_OPENMIND_HUB=1` for Windows
|
||||||
|
```
|
||||||
|
|
||||||
|
Train the model by specifying a model ID of the Modelers Hub as the `model_name_or_path`. You can find a full list of model IDs at [Modelers Hub](https://modelers.cn/models), e.g., `TeleAI/TeleChat-7B-pt`.
|
||||||
|
|
||||||
### Use W&B Logger
|
### Use W&B Logger
|
||||||
|
|
||||||
To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files.
|
To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files.
|
||||||
|
20
README_zh.md
20
README_zh.md
@ -76,6 +76,8 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
|||||||
|
|
||||||
## 更新日志
|
## 更新日志
|
||||||
|
|
||||||
|
[24/10/09] 我们支持了从 **[魔乐社区](https://modelers.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔乐社区下载)。
|
||||||
|
|
||||||
[24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。
|
[24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。
|
||||||
|
|
||||||
[24/08/30] 我们支持了 **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** 模型的微调。感谢 [@simonJJJ](https://github.com/simonJJJ) 的 PR。
|
[24/08/30] 我们支持了 **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** 模型的微调。感谢 [@simonJJJ](https://github.com/simonJJJ) 的 PR。
|
||||||
@ -365,7 +367,7 @@ cd LLaMA-Factory
|
|||||||
pip install -e ".[torch,metrics]"
|
pip install -e ".[torch,metrics]"
|
||||||
```
|
```
|
||||||
|
|
||||||
可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、quality
|
可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、adam-mini、qwen、modelscope、openmind、quality
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。
|
> 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。
|
||||||
@ -417,7 +419,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
|||||||
|
|
||||||
### 数据准备
|
### 数据准备
|
||||||
|
|
||||||
关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope 上的数据集或加载本地数据集。
|
关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope / Modelers 上的数据集或加载本地数据集。
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。
|
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。
|
||||||
@ -485,6 +487,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
|
|||||||
docker run -dit --gpus=all \
|
docker run -dit --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-p 7860:7860 \
|
-p 7860:7860 \
|
||||||
@ -509,6 +512,7 @@ docker build -f ./docker/docker-npu/Dockerfile \
|
|||||||
docker run -dit \
|
docker run -dit \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-v /usr/local/dcmi:/usr/local/dcmi \
|
-v /usr/local/dcmi:/usr/local/dcmi \
|
||||||
@ -542,6 +546,7 @@ docker build -f ./docker/docker-rocm/Dockerfile \
|
|||||||
docker run -dit \
|
docker run -dit \
|
||||||
-v ./hf_cache:/root/.cache/huggingface \
|
-v ./hf_cache:/root/.cache/huggingface \
|
||||||
-v ./ms_cache:/root/.cache/modelscope \
|
-v ./ms_cache:/root/.cache/modelscope \
|
||||||
|
-v ./om_cache:/root/.cache/openmind \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-v ./saves:/app/saves \
|
-v ./saves:/app/saves \
|
||||||
@ -562,6 +567,7 @@ docker exec -it llamafactory bash
|
|||||||
|
|
||||||
- `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
|
- `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
|
||||||
- `ms_cache`:类似 Hugging Face 缓存文件夹,为 ModelScope 用户提供。
|
- `ms_cache`:类似 Hugging Face 缓存文件夹,为 ModelScope 用户提供。
|
||||||
|
- `om_cache`:类似 Hugging Face 缓存文件夹,为 Modelers 用户提供。
|
||||||
- `data`:宿主机中存放数据集的文件夹路径。
|
- `data`:宿主机中存放数据集的文件夹路径。
|
||||||
- `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
|
- `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
|
||||||
|
|
||||||
@ -586,6 +592,16 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
|
|||||||
|
|
||||||
将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
|
将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
|
||||||
|
|
||||||
|
### 从魔乐社区下载
|
||||||
|
|
||||||
|
您也可以通过下述方法,使用魔乐社区下载数据集和模型。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1`
|
||||||
|
```
|
||||||
|
|
||||||
|
将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔乐社区](https://modelers.cn/models)查看所有可用的模型,例如 `TeleAI/TeleChat-7B-pt`。
|
||||||
|
|
||||||
### 使用 W&B 面板
|
### 使用 W&B 面板
|
||||||
|
|
||||||
若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。
|
若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。
|
||||||
|
@ -54,7 +54,8 @@
|
|||||||
},
|
},
|
||||||
"alpaca_en": {
|
"alpaca_en": {
|
||||||
"hf_hub_url": "llamafactory/alpaca_en",
|
"hf_hub_url": "llamafactory/alpaca_en",
|
||||||
"ms_hub_url": "llamafactory/alpaca_en"
|
"ms_hub_url": "llamafactory/alpaca_en",
|
||||||
|
"om_hub_url": "HaM/alpaca_en"
|
||||||
},
|
},
|
||||||
"alpaca_zh": {
|
"alpaca_zh": {
|
||||||
"hf_hub_url": "llamafactory/alpaca_zh",
|
"hf_hub_url": "llamafactory/alpaca_zh",
|
||||||
@ -66,7 +67,8 @@
|
|||||||
},
|
},
|
||||||
"alpaca_gpt4_zh": {
|
"alpaca_gpt4_zh": {
|
||||||
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
||||||
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
|
"ms_hub_url": "llamafactory/alpaca_gpt4_zh",
|
||||||
|
"om_hub_url": "State_Cloud/alpaca-gpt4-data-zh"
|
||||||
},
|
},
|
||||||
"glaive_toolcall_en": {
|
"glaive_toolcall_en": {
|
||||||
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
||||||
|
@ -16,6 +16,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ../../hf_cache:/root/.cache/huggingface
|
- ../../hf_cache:/root/.cache/huggingface
|
||||||
- ../../ms_cache:/root/.cache/modelscope
|
- ../../ms_cache:/root/.cache/modelscope
|
||||||
|
- ../../om_cache:/root/.cache/openmind
|
||||||
- ../../data:/app/data
|
- ../../data:/app/data
|
||||||
- ../../output:/app/output
|
- ../../output:/app/output
|
||||||
ports:
|
ports:
|
||||||
|
@ -10,6 +10,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ../../hf_cache:/root/.cache/huggingface
|
- ../../hf_cache:/root/.cache/huggingface
|
||||||
- ../../ms_cache:/root/.cache/modelscope
|
- ../../ms_cache:/root/.cache/modelscope
|
||||||
|
- ../../om_cache:/root/.cache/openmind
|
||||||
- ../../data:/app/data
|
- ../../data:/app/data
|
||||||
- ../../output:/app/output
|
- ../../output:/app/output
|
||||||
- /usr/local/dcmi:/usr/local/dcmi
|
- /usr/local/dcmi:/usr/local/dcmi
|
||||||
|
@ -15,6 +15,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ../../hf_cache:/root/.cache/huggingface
|
- ../../hf_cache:/root/.cache/huggingface
|
||||||
- ../../ms_cache:/root/.cache/modelscope
|
- ../../ms_cache:/root/.cache/modelscope
|
||||||
|
- ../../om_cache:/root/.cache/openmind
|
||||||
- ../../data:/app/data
|
- ../../data:/app/data
|
||||||
- ../../output:/app/output
|
- ../../output:/app/output
|
||||||
- ../../saves:/app/saves
|
- ../../saves:/app/saves
|
||||||
|
@ -3,7 +3,7 @@ datasets>=2.16.0,<=2.21.0
|
|||||||
accelerate>=0.30.1,<=0.34.2
|
accelerate>=0.30.1,<=0.34.2
|
||||||
peft>=0.11.1,<=0.12.0
|
peft>=0.11.1,<=0.12.0
|
||||||
trl>=0.8.6,<=0.9.6
|
trl>=0.8.6,<=0.9.6
|
||||||
gradio>=4.0.0
|
gradio>=4.0.0,<5.0.0
|
||||||
pandas>=2.0.0
|
pandas>=2.0.0
|
||||||
scipy
|
scipy
|
||||||
einops
|
einops
|
||||||
|
3
setup.py
3
setup.py
@ -54,12 +54,13 @@ extra_require = {
|
|||||||
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
||||||
"awq": ["autoawq"],
|
"awq": ["autoawq"],
|
||||||
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
||||||
"vllm": ["vllm>=0.4.3,<=0.6.2"],
|
"vllm": ["vllm>=0.4.3,<=0.6.3"],
|
||||||
"galore": ["galore-torch"],
|
"galore": ["galore-torch"],
|
||||||
"badam": ["badam>=1.2.1"],
|
"badam": ["badam>=1.2.1"],
|
||||||
"adam-mini": ["adam-mini"],
|
"adam-mini": ["adam-mini"],
|
||||||
"qwen": ["transformers_stream_generator"],
|
"qwen": ["transformers_stream_generator"],
|
||||||
"modelscope": ["modelscope"],
|
"modelscope": ["modelscope"],
|
||||||
|
"openmind": ["openmind"],
|
||||||
"dev": ["ruff", "pytest"],
|
"dev": ["ruff", "pytest"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ Force check imports: FORCE_CHECK_IMPORTS=1
|
|||||||
Force using torchrun: FORCE_TORCHRUN=1
|
Force using torchrun: FORCE_TORCHRUN=1
|
||||||
Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
|
Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
|
||||||
Use modelscope: USE_MODELSCOPE_HUB=1
|
Use modelscope: USE_MODELSCOPE_HUB=1
|
||||||
|
Use openmind: USE_OPENMIND_HUB=1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .extras.env import VERSION
|
from .extras.env import VERSION
|
||||||
|
@ -53,7 +53,7 @@ def _load_single_dataset(
|
|||||||
"""
|
"""
|
||||||
logger.info("Loading dataset {}...".format(dataset_attr))
|
logger.info("Loading dataset {}...".format(dataset_attr))
|
||||||
data_path, data_name, data_dir, data_files = None, None, None, None
|
data_path, data_name, data_dir, data_files = None, None, None, None
|
||||||
if dataset_attr.load_from in ["hf_hub", "ms_hub"]:
|
if dataset_attr.load_from in ["hf_hub", "ms_hub", "om_hub"]:
|
||||||
data_path = dataset_attr.dataset_name
|
data_path = dataset_attr.dataset_name
|
||||||
data_name = dataset_attr.subset
|
data_name = dataset_attr.subset
|
||||||
data_dir = dataset_attr.folder
|
data_dir = dataset_attr.folder
|
||||||
@ -102,6 +102,23 @@ def _load_single_dataset(
|
|||||||
)
|
)
|
||||||
if isinstance(dataset, MsDataset):
|
if isinstance(dataset, MsDataset):
|
||||||
dataset = dataset.to_hf_dataset()
|
dataset = dataset.to_hf_dataset()
|
||||||
|
|
||||||
|
elif dataset_attr.load_from == "om_hub":
|
||||||
|
require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0")
|
||||||
|
from openmind import OmDataset
|
||||||
|
from openmind.utils.hub import OM_DATASETS_CACHE
|
||||||
|
|
||||||
|
cache_dir = model_args.cache_dir or OM_DATASETS_CACHE
|
||||||
|
dataset = OmDataset.load_dataset(
|
||||||
|
path=data_path,
|
||||||
|
name=data_name,
|
||||||
|
data_dir=data_dir,
|
||||||
|
data_files=data_files,
|
||||||
|
split=dataset_attr.split,
|
||||||
|
cache_dir=cache_dir,
|
||||||
|
token=model_args.om_hub_token,
|
||||||
|
streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
dataset = load_dataset(
|
dataset = load_dataset(
|
||||||
path=data_path,
|
path=data_path,
|
||||||
|
@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence
|
|||||||
from transformers.utils import cached_file
|
from transformers.utils import cached_file
|
||||||
|
|
||||||
from ..extras.constants import DATA_CONFIG
|
from ..extras.constants import DATA_CONFIG
|
||||||
from ..extras.misc import use_modelscope
|
from ..extras.misc import use_modelscope, use_openmind
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -30,7 +30,7 @@ class DatasetAttr:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# basic configs
|
# basic configs
|
||||||
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
load_from: Literal["hf_hub", "ms_hub", "om_hub", "script", "file"]
|
||||||
dataset_name: str
|
dataset_name: str
|
||||||
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
||||||
ranking: bool = False
|
ranking: bool = False
|
||||||
@ -98,7 +98,12 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
|||||||
dataset_list: List["DatasetAttr"] = []
|
dataset_list: List["DatasetAttr"] = []
|
||||||
for name in dataset_names:
|
for name in dataset_names:
|
||||||
if dataset_info is None: # dataset_dir is ONLINE
|
if dataset_info is None: # dataset_dir is ONLINE
|
||||||
load_from = "ms_hub" if use_modelscope() else "hf_hub"
|
if use_modelscope():
|
||||||
|
load_from = "ms_hub"
|
||||||
|
elif use_openmind():
|
||||||
|
load_from = "om_hub"
|
||||||
|
else:
|
||||||
|
load_from = "hf_hub"
|
||||||
dataset_attr = DatasetAttr(load_from, dataset_name=name)
|
dataset_attr = DatasetAttr(load_from, dataset_name=name)
|
||||||
dataset_list.append(dataset_attr)
|
dataset_list.append(dataset_attr)
|
||||||
continue
|
continue
|
||||||
@ -108,10 +113,13 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
|||||||
|
|
||||||
has_hf_url = "hf_hub_url" in dataset_info[name]
|
has_hf_url = "hf_hub_url" in dataset_info[name]
|
||||||
has_ms_url = "ms_hub_url" in dataset_info[name]
|
has_ms_url = "ms_hub_url" in dataset_info[name]
|
||||||
|
has_om_url = "om_hub_url" in dataset_info[name]
|
||||||
|
|
||||||
if has_hf_url or has_ms_url:
|
if has_hf_url or has_ms_url or has_om_url:
|
||||||
if (use_modelscope() and has_ms_url) or (not has_hf_url):
|
if has_ms_url and (use_modelscope() or not has_hf_url):
|
||||||
dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"])
|
dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"])
|
||||||
|
elif has_om_url and (use_openmind() or not has_hf_url):
|
||||||
|
dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"])
|
||||||
else:
|
else:
|
||||||
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
|
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
|
||||||
elif "script_url" in dataset_info[name]:
|
elif "script_url" in dataset_info[name]:
|
||||||
|
@ -107,6 +107,7 @@ VISION_MODELS = set()
|
|||||||
class DownloadSource(str, Enum):
|
class DownloadSource(str, Enum):
|
||||||
DEFAULT = "hf"
|
DEFAULT = "hf"
|
||||||
MODELSCOPE = "ms"
|
MODELSCOPE = "ms"
|
||||||
|
OPENMIND = "om"
|
||||||
|
|
||||||
|
|
||||||
def register_model_group(
|
def register_model_group(
|
||||||
@ -163,14 +164,17 @@ register_model_group(
|
|||||||
"Baichuan2-13B-Base": {
|
"Baichuan2-13B-Base": {
|
||||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
|
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
|
||||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base",
|
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base",
|
||||||
|
DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_base_pt",
|
||||||
},
|
},
|
||||||
"Baichuan2-7B-Chat": {
|
"Baichuan2-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
|
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
|
||||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat",
|
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat",
|
||||||
|
DownloadSource.OPENMIND: "Baichuan/Baichuan2_7b_chat_pt",
|
||||||
},
|
},
|
||||||
"Baichuan2-13B-Chat": {
|
"Baichuan2-13B-Chat": {
|
||||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
|
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
|
||||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat",
|
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat",
|
||||||
|
DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_chat_pt",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="baichuan2",
|
template="baichuan2",
|
||||||
@ -555,10 +559,12 @@ register_model_group(
|
|||||||
"Gemma-2-2B-Instruct": {
|
"Gemma-2-2B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-2-2b-it",
|
DownloadSource.DEFAULT: "google/gemma-2-2b-it",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-2b-it",
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-2b-it",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/gemma-2-2b-it",
|
||||||
},
|
},
|
||||||
"Gemma-2-9B-Instruct": {
|
"Gemma-2-9B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
|
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it",
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/gemma-2-9b-it",
|
||||||
},
|
},
|
||||||
"Gemma-2-27B-Instruct": {
|
"Gemma-2-27B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
|
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
|
||||||
@ -578,6 +584,7 @@ register_model_group(
|
|||||||
"GLM-4-9B-Chat": {
|
"GLM-4-9B-Chat": {
|
||||||
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat",
|
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat",
|
||||||
DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat",
|
DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/glm-4-9b-chat",
|
||||||
},
|
},
|
||||||
"GLM-4-9B-1M-Chat": {
|
"GLM-4-9B-1M-Chat": {
|
||||||
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
|
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
|
||||||
@ -632,6 +639,7 @@ register_model_group(
|
|||||||
"InternLM2.5-1.8B": {
|
"InternLM2.5-1.8B": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b",
|
||||||
},
|
},
|
||||||
"InternLM2.5-7B": {
|
"InternLM2.5-7B": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-7b",
|
||||||
@ -640,22 +648,27 @@ register_model_group(
|
|||||||
"InternLM2.5-20B": {
|
"InternLM2.5-20B": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-20b",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-20b",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-20b",
|
||||||
},
|
},
|
||||||
"InternLM2.5-1.8B-Chat": {
|
"InternLM2.5-1.8B-Chat": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b-chat",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b-chat",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b-chat",
|
||||||
},
|
},
|
||||||
"InternLM2.5-7B-Chat": {
|
"InternLM2.5-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat",
|
||||||
},
|
},
|
||||||
"InternLM2.5-7B-1M-Chat": {
|
"InternLM2.5-7B-1M-Chat": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat-1m",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat-1m",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat-1m",
|
||||||
},
|
},
|
||||||
"InternLM2.5-20B-Chat": {
|
"InternLM2.5-20B-Chat": {
|
||||||
DownloadSource.DEFAULT: "internlm/internlm2_5-20b-chat",
|
DownloadSource.DEFAULT: "internlm/internlm2_5-20b-chat",
|
||||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
|
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
|
||||||
|
DownloadSource.OPENMIND: "Intern/internlm2_5-20b-chat",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="intern2",
|
template="intern2",
|
||||||
@ -756,6 +769,7 @@ register_model_group(
|
|||||||
"Llama-3-8B-Chinese-Chat": {
|
"Llama-3-8B-Chinese-Chat": {
|
||||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Llama3-Chinese-8B-Instruct",
|
||||||
},
|
},
|
||||||
"Llama-3-70B-Chinese-Chat": {
|
"Llama-3-70B-Chinese-Chat": {
|
||||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat",
|
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat",
|
||||||
@ -960,6 +974,7 @@ register_model_group(
|
|||||||
"MiniCPM3-4B-Chat": {
|
"MiniCPM3-4B-Chat": {
|
||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/MiniCPM3-4B",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="cpm3",
|
template="cpm3",
|
||||||
@ -1421,14 +1436,17 @@ register_model_group(
|
|||||||
"Qwen2-0.5B-Instruct": {
|
"Qwen2-0.5B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct",
|
DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-0.5B-Instruct",
|
||||||
},
|
},
|
||||||
"Qwen2-1.5B-Instruct": {
|
"Qwen2-1.5B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct",
|
DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-1.5B-Instruct",
|
||||||
},
|
},
|
||||||
"Qwen2-7B-Instruct": {
|
"Qwen2-7B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct",
|
DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-7B-Instruct",
|
||||||
},
|
},
|
||||||
"Qwen2-72B-Instruct": {
|
"Qwen2-72B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct",
|
||||||
@ -1711,10 +1729,12 @@ register_model_group(
|
|||||||
"Qwen2-VL-2B-Instruct": {
|
"Qwen2-VL-2B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-2B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-2B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-2B-Instruct",
|
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-2B-Instruct",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-2B-Instruct",
|
||||||
},
|
},
|
||||||
"Qwen2-VL-7B-Instruct": {
|
"Qwen2-VL-7B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-7B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-7B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-7B-Instruct",
|
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-7B-Instruct",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-7B-Instruct",
|
||||||
},
|
},
|
||||||
"Qwen2-VL-72B-Instruct": {
|
"Qwen2-VL-72B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-72B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-72B-Instruct",
|
||||||
@ -1813,10 +1833,12 @@ register_model_group(
|
|||||||
"TeleChat-7B-Chat": {
|
"TeleChat-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "Tele-AI/telechat-7B",
|
DownloadSource.DEFAULT: "Tele-AI/telechat-7B",
|
||||||
DownloadSource.MODELSCOPE: "TeleAI/telechat-7B",
|
DownloadSource.MODELSCOPE: "TeleAI/telechat-7B",
|
||||||
|
DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt",
|
||||||
},
|
},
|
||||||
"TeleChat-12B-Chat": {
|
"TeleChat-12B-Chat": {
|
||||||
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
|
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
|
||||||
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
|
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
|
||||||
|
DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt",
|
||||||
},
|
},
|
||||||
"TeleChat-12B-v2-Chat": {
|
"TeleChat-12B-v2-Chat": {
|
||||||
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
|
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
|
||||||
@ -2035,6 +2057,7 @@ register_model_group(
|
|||||||
"Yi-1.5-6B-Chat": {
|
"Yi-1.5-6B-Chat": {
|
||||||
DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat",
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat",
|
||||||
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat",
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat",
|
||||||
|
DownloadSource.OPENMIND: "LlamaFactory/Yi-1.5-6B-Chat",
|
||||||
},
|
},
|
||||||
"Yi-1.5-9B-Chat": {
|
"Yi-1.5-9B-Chat": {
|
||||||
DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat",
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat",
|
||||||
|
@ -231,18 +231,35 @@ def torch_gc() -> None:
|
|||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
|
||||||
def try_download_model_from_ms(model_args: "ModelArguments") -> str:
|
def try_download_model_from_other_hub(model_args: "ModelArguments") -> str:
|
||||||
if not use_modelscope() or os.path.exists(model_args.model_name_or_path):
|
if (not use_modelscope() and not use_openmind()) or os.path.exists(model_args.model_name_or_path):
|
||||||
return model_args.model_name_or_path
|
return model_args.model_name_or_path
|
||||||
|
|
||||||
try:
|
if use_modelscope():
|
||||||
|
require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0")
|
||||||
from modelscope import snapshot_download
|
from modelscope import snapshot_download
|
||||||
|
|
||||||
revision = "master" if model_args.model_revision == "main" else model_args.model_revision
|
revision = "master" if model_args.model_revision == "main" else model_args.model_revision
|
||||||
return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir)
|
return snapshot_download(
|
||||||
except ImportError:
|
model_args.model_name_or_path,
|
||||||
raise ImportError("Please install modelscope via `pip install modelscope -U`")
|
revision=revision,
|
||||||
|
cache_dir=model_args.cache_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
if use_openmind():
|
||||||
|
require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0")
|
||||||
|
from openmind.utils.hub import snapshot_download
|
||||||
|
|
||||||
|
return snapshot_download(
|
||||||
|
model_args.model_name_or_path,
|
||||||
|
revision=model_args.model_revision,
|
||||||
|
cache_dir=model_args.cache_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def use_modelscope() -> bool:
|
def use_modelscope() -> bool:
|
||||||
return os.environ.get("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"]
|
return os.environ.get("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"]
|
||||||
|
|
||||||
|
|
||||||
|
def use_openmind() -> bool:
|
||||||
|
return os.environ.get("USE_OPENMIND_HUB", "0").lower() in ["true", "1"]
|
||||||
|
@ -267,6 +267,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "Auth token to log in with ModelScope Hub."},
|
metadata={"help": "Auth token to log in with ModelScope Hub."},
|
||||||
)
|
)
|
||||||
|
om_hub_token: Optional[str] = field(
|
||||||
|
default=None,
|
||||||
|
metadata={"help": "Auth token to log in with Modelers Hub."},
|
||||||
|
)
|
||||||
print_param_status: bool = field(
|
print_param_status: bool = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
||||||
|
@ -123,7 +123,7 @@ def _check_extra_dependencies(
|
|||||||
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
|
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
|
||||||
|
|
||||||
if model_args.infer_backend == "vllm":
|
if model_args.infer_backend == "vllm":
|
||||||
require_version("vllm>=0.4.3,<=0.6.2", "To fix: pip install vllm>=0.4.3,<=0.6.2")
|
require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.3")
|
||||||
|
|
||||||
if finetuning_args.use_galore:
|
if finetuning_args.use_galore:
|
||||||
require_version("galore_torch", "To fix: pip install galore_torch")
|
require_version("galore_torch", "To fix: pip install galore_torch")
|
||||||
|
@ -19,7 +19,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Se
|
|||||||
from trl import AutoModelForCausalLMWithValueHead
|
from trl import AutoModelForCausalLMWithValueHead
|
||||||
|
|
||||||
from ..extras.logging import get_logger
|
from ..extras.logging import get_logger
|
||||||
from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_ms
|
from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_other_hub
|
||||||
from .adapter import init_adapter
|
from .adapter import init_adapter
|
||||||
from .model_utils.liger_kernel import apply_liger_kernel
|
from .model_utils.liger_kernel import apply_liger_kernel
|
||||||
from .model_utils.misc import register_autoclass
|
from .model_utils.misc import register_autoclass
|
||||||
@ -50,7 +50,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]:
|
|||||||
Note: including inplace operation of model_args.
|
Note: including inplace operation of model_args.
|
||||||
"""
|
"""
|
||||||
skip_check_imports()
|
skip_check_imports()
|
||||||
model_args.model_name_or_path = try_download_model_from_ms(model_args)
|
model_args.model_name_or_path = try_download_model_from_other_hub(model_args)
|
||||||
return {
|
return {
|
||||||
"trust_remote_code": True,
|
"trust_remote_code": True,
|
||||||
"cache_dir": model_args.cache_dir,
|
"cache_dir": model_args.cache_dir,
|
||||||
|
@ -31,7 +31,7 @@ from ..extras.constants import (
|
|||||||
DownloadSource,
|
DownloadSource,
|
||||||
)
|
)
|
||||||
from ..extras.logging import get_logger
|
from ..extras.logging import get_logger
|
||||||
from ..extras.misc import use_modelscope
|
from ..extras.misc import use_modelscope, use_openmind
|
||||||
from ..extras.packages import is_gradio_available
|
from ..extras.packages import is_gradio_available
|
||||||
|
|
||||||
|
|
||||||
@ -109,9 +109,16 @@ def get_model_path(model_name: str) -> str:
|
|||||||
use_modelscope()
|
use_modelscope()
|
||||||
and path_dict.get(DownloadSource.MODELSCOPE)
|
and path_dict.get(DownloadSource.MODELSCOPE)
|
||||||
and model_path == path_dict.get(DownloadSource.DEFAULT)
|
and model_path == path_dict.get(DownloadSource.DEFAULT)
|
||||||
): # replace path
|
): # replace hf path with ms path
|
||||||
model_path = path_dict.get(DownloadSource.MODELSCOPE)
|
model_path = path_dict.get(DownloadSource.MODELSCOPE)
|
||||||
|
|
||||||
|
if (
|
||||||
|
use_openmind()
|
||||||
|
and path_dict.get(DownloadSource.OPENMIND)
|
||||||
|
and model_path == path_dict.get(DownloadSource.DEFAULT)
|
||||||
|
): # replace hf path with om path
|
||||||
|
model_path = path_dict.get(DownloadSource.OPENMIND)
|
||||||
|
|
||||||
return model_path
|
return model_path
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user