diff --git a/README.md b/README.md index e2b10f3a..7a4c8ee1 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[23/12/01] We supported **[ModelScope Hub](https://www.modelscope.cn/models)** to accelerate model downloading. Add environment variable `USE_MODELSCOPE_HUB=1` to your command line, then you can use the model-id of ModelScope Hub. + [23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neft_alpha` argument to activate NEFTune, e.g., `--neft_alpha 5`. [23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention. @@ -229,6 +231,33 @@ If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you wi pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.39.1-py3-none-win_amd64.whl ``` +### Use ModelScope Models + +If you have trouble with downloading models from HuggingFace, we have supported ModelScope Hub. To use LLaMA-Factory together with ModelScope, please add a environment variable: + +```shell +export USE_MODELSCOPE_HUB=1 +``` + +> [!NOTE] +> +> Please use integers only. 0 or not set for using HuggingFace hub. Other values will be treated as use ModelScope hub. + +Then you can use LLaMA-Factory with ModelScope model-ids: + +```shell +python src/train_bash.py \ + --model_name_or_path ZhipuAI/chatglm3-6b \ + ... other arguments +# You can find all model ids in this link: https://www.modelscope.cn/models +``` + +Web demo also supports ModelScope, after setting the environment variable please run with this command: + +```shell +CUDA_VISIBLE_DEVICES=0 python src/train_web.py +``` + ### Train on a single GPU > [!IMPORTANT] diff --git a/README_zh.md b/README_zh.md index 9e3c0833..6a68ce30 100644 --- a/README_zh.md +++ b/README_zh.md @@ -51,6 +51,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 ## 更新日志 +[23/12/01] 我们支持了 **[魔搭ModelHub](https://www.modelscope.cn/models)** 进行模型下载加速。在启动命令前环境变量中增加 `USE_MODELSCOPE_HUB=1` 即可开启。 + [23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neft_alpha` 参数启用 NEFTune,例如 `--neft_alpha 5`。 [23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。 @@ -229,6 +231,33 @@ pip install -r requirements.txt pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.39.1-py3-none-win_amd64.whl ``` +### 使用魔搭的模型 + +如果下载HuggingFace模型存在问题,我们已经支持了魔搭的ModelHub,只需要添加一个环境变量: + +```shell +export USE_MODELSCOPE_HUB=1 +``` + +> [!NOTE] +> +> 该环境变量仅支持整数,0或者不设置代表使用HuggingFace,其他值代表使用ModelScope + +之后就可以在命令行中指定魔搭的模型id: + +```shell +python src/train_bash.py \ + --model_name_or_path ZhipuAI/chatglm3-6b \ + ... other arguments +# 在这个链接中可以看到所有可用模型: https://www.modelscope.cn/models +``` + +Web demo目前也支持了魔搭, 在设置环境变量后即可使用: + +```shell +CUDA_VISIBLE_DEVICES=0 python src/train_web.py +``` + ### 单 GPU 训练 > [!IMPORTANT] diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index f1f4d283..7e66d1b3 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1,6 +1,6 @@ +import os from collections import defaultdict, OrderedDict -from typing import Dict, Optional - +from typing import Dict, Optional, Union CHOICES = ["A", "B", "C", "D"] @@ -20,6 +20,8 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() +ALL_OFFICIAL_MODELS = OrderedDict() + TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", "Reward Modeling": "rm", @@ -30,7 +32,7 @@ TRAINING_STAGES = { def register_model_group( - models: Dict[str, str], + models: Dict[str, Union[str, Dict[str, str]]], module: Optional[str] = None, template: Optional[str] = None ) -> None: @@ -40,7 +42,14 @@ def register_model_group( prefix = name.split("-")[0] else: assert prefix == name.split("-")[0], "prefix should be identical." - SUPPORTED_MODELS[name] = path + + ALL_OFFICIAL_MODELS[name] = [path] if isinstance(path, str) else list(path.values()) + if not int(os.environ.get('USE_MODELSCOPE_HUB', '0')): + # If path is a string, we treat it as a huggingface model-id by default. + SUPPORTED_MODELS[name] = path["hf"] if isinstance(path, dict) else path + elif isinstance(path, dict) and "ms" in path: + # Use ModelScope modelhub + SUPPORTED_MODELS[name] = path["ms"] if module is not None: DEFAULT_MODULE[prefix] = module if template is not None: @@ -49,9 +58,18 @@ def register_model_group( register_model_group( models={ - "Baichuan-7B-Base": "baichuan-inc/Baichuan-7B", - "Baichuan-13B-Base": "baichuan-inc/Baichuan-13B-Base", - "Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat" + "Baichuan-7B-Base": { + "hf": "baichuan-inc/Baichuan-7B", + "ms": "baichuan-inc/baichuan-7B", + }, + "Baichuan-13B-Base": { + "hf": "baichuan-inc/Baichuan-13B-Base", + "ms": "baichuan-inc/Baichuan-13B-Base", + }, + "Baichuan-13B-Chat": { + "hf": "baichuan-inc/Baichuan-13B-Chat", + "ms": "baichuan-inc/Baichuan-13B-Base", + } }, module="W_pack", template="baichuan" @@ -60,10 +78,22 @@ register_model_group( register_model_group( models={ - "Baichuan2-7B-Base": "baichuan-inc/Baichuan2-7B-Base", - "Baichuan2-13B-Base": "baichuan-inc/Baichuan2-13B-Base", - "Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat", - "Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat" + "Baichuan2-7B-Base": { + "hf": "baichuan-inc/Baichuan2-7B-Base", + "ms": "baichuan-inc/Baichuan2-7B-Base", + }, + "Baichuan2-13B-Base": { + "hf": "baichuan-inc/Baichuan2-13B-Base", + "ms": "baichuan-inc/Baichuan2-13B-Base", + }, + "Baichuan2-7B-Chat": { + "hf": "baichuan-inc/Baichuan2-7B-Chat", + "ms": "baichuan-inc/Baichuan2-7B-Chat", + }, + "Baichuan2-13B-Chat": { + "hf": "baichuan-inc/Baichuan2-13B-Chat", + "ms": "baichuan-inc/Baichuan2-13B-Chat", + } }, module="W_pack", template="baichuan2" @@ -72,9 +102,18 @@ register_model_group( register_model_group( models={ - "BLOOM-560M": "bigscience/bloom-560m", - "BLOOM-3B": "bigscience/bloom-3b", - "BLOOM-7B1": "bigscience/bloom-7b1" + "BLOOM-560M": { + "hf": "bigscience/bloom-560m", + "ms": "AI-ModelScope/bloom-560m", + }, + "BLOOM-3B": { + "hf": "bigscience/bloom-3b", + "ms": "AI-ModelScope/bloom-3b", + }, + "BLOOM-7B1": { + "hf": "bigscience/bloom-7b1", + "ms": "AI-ModelScope/bloom-7b1", + } }, module="query_key_value" ) @@ -82,9 +121,18 @@ register_model_group( register_model_group( models={ - "BLOOMZ-560M": "bigscience/bloomz-560m", - "BLOOMZ-3B": "bigscience/bloomz-3b", - "BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt" + "BLOOMZ-560M": { + "hf": "bigscience/bloomz-560m", + "ms": "AI-ModelScope/bloomz-560m", + }, + "BLOOMZ-3B": { + "hf": "bigscience/bloomz-3b", + "ms": "AI-ModelScope/bloomz-3b", + }, + "BLOOMZ-7B1-mt": { + "hf": "bigscience/bloomz-7b1-mt", + "ms": "AI-ModelScope/bloomz-7b1-mt", + } }, module="query_key_value" ) @@ -92,8 +140,14 @@ register_model_group( register_model_group( models={ - "BlueLM-7B-Base": "vivo-ai/BlueLM-7B-Base", - "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Base": { + "hf": "vivo-ai/BlueLM-7B-Base", + "ms": "vivo-ai/BlueLM-7B-Base", + }, + "BlueLM-7B-Chat": { + "hf": "vivo-ai/BlueLM-7B-Chat", + "ms": "vivo-ai/BlueLM-7B-Chat", + } }, template="bluelm" ) @@ -101,7 +155,10 @@ register_model_group( register_model_group( models={ - "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b" + "ChatGLM2-6B-Chat": { + "hf": "THUDM/chatglm2-6b", + "ms": "ZhipuAI/chatglm2-6b", + } }, module="query_key_value", template="chatglm2" @@ -110,8 +167,14 @@ register_model_group( register_model_group( models={ - "ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base", - "ChatGLM3-6B-Chat": "THUDM/chatglm3-6b" + "ChatGLM3-6B-Base": { + "hf": "THUDM/chatglm3-6b-base", + "ms": "ZhipuAI/chatglm3-6b-base", + }, + "ChatGLM3-6B-Chat": { + "hf": "THUDM/chatglm3-6b", + "ms": "ZhipuAI/chatglm3-6b", + } }, module="query_key_value", template="chatglm3" @@ -120,12 +183,30 @@ register_model_group( register_model_group( models={ - "ChineseLLaMA2-1.3B": "hfl/chinese-llama-2-1.3b", - "ChineseLLaMA2-7B": "hfl/chinese-llama-2-7b", - "ChineseLLaMA2-13B": "hfl/chinese-llama-2-13b", - "ChineseLLaMA2-1.3B-Chat": "hfl/chinese-alpaca-2-1.3b", - "ChineseLLaMA2-7B-Chat": "hfl/chinese-alpaca-2-7b", - "ChineseLLaMA2-13B-Chat": "hfl/chinese-alpaca-2-13b" + "ChineseLLaMA2-1.3B": { + "hf": "hfl/chinese-llama-2-1.3b", + "ms": "AI-ModelScope/chinese-llama-2-1.3b", + }, + "ChineseLLaMA2-7B": { + "hf": "hfl/chinese-llama-2-7b", + "ms": "AI-ModelScope/chinese-llama-2-7b", + }, + "ChineseLLaMA2-13B": { + "hf": "hfl/chinese-llama-2-13b", + "ms": "AI-ModelScope/chinese-llama-2-13b", + }, + "ChineseLLaMA2-1.3B-Chat": { + "hf": "hfl/chinese-alpaca-2-1.3b", + "ms": "AI-ModelScope/chinese-alpaca-2-1.3b", + }, + "ChineseLLaMA2-7B-Chat": { + "hf": "hfl/chinese-alpaca-2-7b", + "ms": "AI-ModelScope/chinese-alpaca-2-7b", + }, + "ChineseLLaMA2-13B-Chat": { + "hf": "hfl/chinese-alpaca-2-13b", + "ms": "AI-ModelScope/chinese-alpaca-2-13b", + } }, template="llama2_zh" ) @@ -133,34 +214,30 @@ register_model_group( register_model_group( models={ - "DeepseekLLM-7B-Base": "deepseek-ai/deepseek-llm-7b-base", - "DeepseekLLM-67B-Base": "deepseek-ai/deepseek-llm-67b-base", - "DeepseekLLM-7B-Chat": "deepseek-ai/deepseek-llm-7b-chat", - "DeepseekLLM-67B-Chat": "deepseek-ai/deepseek-llm-67b-chat" - }, - template="deepseek" -) - - -register_model_group( - models={ - "DeepseekCoder-6.7B-Base": "deepseek-ai/deepseek-coder-6.7b-base", - "DeepseekCoder-6.7B-Chat": "deepseek-ai/deepseek-coder-6.7b-instruct", - "DeepseekCoder-33B-Base": "deepseek-ai/deepseek-coder-33b-base", - "DeepseekCoder-33B-Chat": "deepseek-ai/deepseek-coder-33b-instruct" - }, - template="deepseekcoder" -) - - -register_model_group( - models={ - "Falcon-7B": "tiiuae/falcon-7b", - "Falcon-40B": "tiiuae/falcon-40b", - "Falcon-180B": "tiiuae/falcon-180B", - "Falcon-7B-Chat": "tiiuae/falcon-7b-instruct", - "Falcon-40B-Chat": "tiiuae/falcon-40b-instruct", - "Falcon-180B-Chat": "tiiuae/falcon-180B-chat" + "Falcon-7B": { + "hf": "tiiuae/falcon-7b", + "ms": "AI-ModelScope/falcon-7b", + }, + "Falcon-40B": { + "hf": "tiiuae/falcon-40b", + "ms": "AI-ModelScope/falcon-40b", + }, + "Falcon-180B": { + "hf": "tiiuae/falcon-180B", + "ms": "AI-ModelScope/falcon-180B", + }, + "Falcon-7B-Chat": { + "hf": "tiiuae/falcon-7b-instruct", + "ms": "AI-ModelScope/falcon-7b-instruct", + }, + "Falcon-40B-Chat": { + "hf": "tiiuae/falcon-40b-instruct", + "ms": "AI-ModelScope/falcon-40b-instruct", + }, + "Falcon-180B-Chat": { + "hf": "tiiuae/falcon-180B-chat", + "ms": "AI-ModelScope/falcon-180B-chat", + } }, module="query_key_value", template="falcon" @@ -169,10 +246,22 @@ register_model_group( register_model_group( models={ - "InternLM-7B": "internlm/internlm-7b", - "InternLM-20B": "internlm/internlm-20b", - "InternLM-7B-Chat": "internlm/internlm-chat-7b", - "InternLM-20B-Chat": "internlm/internlm-chat-20b" + "InternLM-7B": { + "hf": "internlm/internlm-7b", + "ms": "Shanghai_AI_Laboratory/internlm-7b", + }, + "InternLM-20B": { + "hf": "internlm/internlm-20b", + "ms": "Shanghai_AI_Laboratory/internlm-20b", + }, + "InternLM-7B-Chat": { + "hf": "internlm/internlm-chat-7b", + "ms": "Shanghai_AI_Laboratory/internlm-chat-7b", + }, + "InternLM-20B-Chat": { + "hf": "internlm/internlm-chat-20b", + "ms": "Shanghai_AI_Laboratory/internlm-chat-20b", + } }, template="intern" ) @@ -180,7 +269,10 @@ register_model_group( register_model_group( models={ - "LingoWhale-8B": "deeplang-ai/LingoWhale-8B" + "LingoWhale-8B": { + "hf": "deeplang-ai/LingoWhale-8B", + "ms": "DeepLang/LingoWhale-8B", + } }, module="qkv_proj" ) @@ -188,22 +280,52 @@ register_model_group( register_model_group( models={ - "LLaMA-7B": "huggyllama/llama-7b", - "LLaMA-13B": "huggyllama/llama-13b", - "LLaMA-30B": "huggyllama/llama-30b", - "LLaMA-65B": "huggyllama/llama-65b" + "LLaMA-7B": { + "hf": "huggyllama/llama-7b", + "ms": "skyline2006/llama-7b", + }, + "LLaMA-13B": { + "hf": "huggyllama/llama-13b", + "ms": "skyline2006/llama-13b", + }, + "LLaMA-30B": { + "hf": "huggyllama/llama-30b", + "ms": "skyline2006/llama-30b", + }, + "LLaMA-65B": { + "hf": "huggyllama/llama-65b", + "ms": "skyline2006/llama-65b", + } } ) register_model_group( models={ - "LLaMA2-7B": "meta-llama/Llama-2-7b-hf", - "LLaMA2-13B": "meta-llama/Llama-2-13b-hf", - "LLaMA2-70B": "meta-llama/Llama-2-70b-hf", - "LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf", - "LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf", - "LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf" + "LLaMA2-7B": { + "hf": "meta-llama/Llama-2-7b-hf", + "ms": "modelscope/Llama-2-7b-ms", + }, + "LLaMA2-13B": { + "hf": "meta-llama/Llama-2-13b-hf", + "ms": "modelscope/Llama-2-13b-ms", + }, + "LLaMA2-70B": { + "hf": "meta-llama/Llama-2-70b-hf", + "ms": "modelscope/Llama-2-70b-ms", + }, + "LLaMA2-7B-Chat": { + "hf": "meta-llama/Llama-2-7b-chat-hf", + "ms": "modelscope/Llama-2-7b-chat-ms", + }, + "LLaMA2-13B-Chat": { + "hf": "meta-llama/Llama-2-13b-chat-hf", + "ms": "modelscope/Llama-2-13b-chat-ms", + }, + "LLaMA2-70B-Chat": { + "hf": "meta-llama/Llama-2-70b-chat-hf", + "ms": "modelscope/Llama-2-70b-chat-ms", + } }, template="llama2" ) @@ -211,8 +333,14 @@ register_model_group( register_model_group( models={ - "Mistral-7B": "mistralai/Mistral-7B-v0.1", - "Mistral-7B-Chat": "mistralai/Mistral-7B-Instruct-v0.1" + "Mistral-7B": { + "hf": "mistralai/Mistral-7B-v0.1", + "ms": "AI-ModelScope/Mistral-7B-v0.1", + }, + "Mistral-7B-Chat": { + "hf": "mistralai/Mistral-7B-Instruct-v0.1", + "ms": "AI-ModelScope/Mistral-7B-Instruct-v0.1", + } }, template="mistral" ) @@ -220,7 +348,10 @@ register_model_group( register_model_group( models={ - "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" + "OpenChat3.5-7B-Chat": { + "hf": "openchat/openchat_3.5", + "ms": "myxiongmodel/openchat_3.5", + } }, template="openchat" ) @@ -228,7 +359,10 @@ register_model_group( register_model_group( models={ - "Phi1.5-1.3B": "microsoft/phi-1_5" + "Phi1.5-1.3B": { + "hf": "microsoft/phi-1_5", + "ms": "allspace/PHI_1-5", + } }, module="Wqkv" ) @@ -236,22 +370,38 @@ register_model_group( register_model_group( models={ - "Qwen-1.8B": "Qwen/Qwen-1_8B", - "Qwen-7B": "Qwen/Qwen-7B", - "Qwen-14B": "Qwen/Qwen-14B", - "Qwen-72B": "Qwen/Qwen-72B", - "Qwen-1.8B-Chat": "Qwen/Qwen-1_8B-Chat", - "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", - "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat", - "Qwen-1.8B-int8-Chat": "Qwen/Qwen-1_8B-Chat-Int8", - "Qwen-1.8B-int4-Chat": "Qwen/Qwen-1_8B-Chat-Int4", - "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8", - "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4", - "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8", - "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4", - "Qwen-72B-int8-Chat": "Qwen/Qwen-72B-Chat-Int8", - "Qwen-72B-int4-Chat": "Qwen/Qwen-72B-Chat-Int4" + "Qwen-7B": { + "hf": "Qwen/Qwen-7B", + "ms": "qwen/Qwen-7B", + }, + "Qwen-14B": { + "hf": "Qwen/Qwen-14B", + "ms": "qwen/Qwen-14B", + }, + "Qwen-7B-Chat": { + "hf": "Qwen/Qwen-7B-Chat", + "ms": "qwen/Qwen-7B-Chat", + }, + "Qwen-14B-Chat": { + "hf": "Qwen/Qwen-14B-Chat", + "ms": "qwen/Qwen-14B-Chat", + }, + "Qwen-7B-int8-Chat": { + "hf": "Qwen/Qwen-7B-Chat-Int8", + "ms": "qwen/Qwen-7B-Chat-Int8", + }, + "Qwen-7B-int4-Chat": { + "hf": "Qwen/Qwen-7B-Chat-Int4", + "ms": "qwen/Qwen-7B-Chat-Int4", + }, + "Qwen-14B-int8-Chat": { + "hf": "Qwen/Qwen-14B-Chat-Int8", + "ms": "qwen/Qwen-14B-Chat-Int8", + }, + "Qwen-14B-int4-Chat": { + "hf": "Qwen/Qwen-14B-Chat-Int4", + "ms": "qwen/Qwen-14B-Chat-Int4", + } }, module="c_attn", template="qwen" @@ -260,15 +410,24 @@ register_model_group( register_model_group( models={ - "Skywork-13B-Base": "Skywork/Skywork-13B-base" + "Skywork-13B-Base": { + "hf": "Skywork/Skywork-13B-base", + "ms": "skywork/Skywork-13B-base", + } } ) register_model_group( models={ - "Vicuna1.5-7B-Chat": "lmsys/vicuna-7b-v1.5", - "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5" + "Vicuna1.5-7B-Chat": { + "hf": "lmsys/vicuna-7b-v1.5", + "ms": "AI-ModelScope/vicuna-7b-v1.5", + }, + "Vicuna1.5-13B-Chat": { + "hf": "lmsys/vicuna-13b-v1.5", + "ms": "Xorbits/vicuna-13b-v1.5", + } }, template="vicuna" ) @@ -276,11 +435,26 @@ register_model_group( register_model_group( models={ - "XVERSE-7B": "xverse/XVERSE-7B", - "XVERSE-13B": "xverse/XVERSE-13B", - "XVERSE-65B": "xverse/XVERSE-65B", - "XVERSE-7B-Chat": "xverse/XVERSE-7B-Chat", - "XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat" + "XVERSE-7B": { + "hf": "xverse/XVERSE-7B", + "ms": "xverse/XVERSE-7B", + }, + "XVERSE-13B": { + "hf": "xverse/XVERSE-13B", + "ms": "xverse/XVERSE-13B", + }, + "XVERSE-65B": { + "hf": "xverse/XVERSE-65B", + "ms": "xverse/XVERSE-65B", + }, + "XVERSE-7B-Chat": { + "hf": "xverse/XVERSE-7B-Chat", + "ms": "xverse/XVERSE-7B-Chat", + }, + "XVERSE-13B-Chat": { + "hf": "xverse/XVERSE-13B-Chat", + "ms": "xverse/XVERSE-13B-Chat", + } }, template="xverse" ) @@ -288,8 +462,14 @@ register_model_group( register_model_group( models={ - "Yayi-7B": "wenge-research/yayi-7b-llama2", - "Yayi-13B": "wenge-research/yayi-13b-llama2" + "Yayi-7B": { + "hf": "wenge-research/yayi-7b-llama2", + "ms": "AI-ModelScope/yayi-7b-llama2", + }, + "Yayi-13B": { + "hf": "wenge-research/yayi-13b-llama2", + "ms": "AI-ModelScope/yayi-13b-llama2", + } }, template="yayi" ) @@ -297,10 +477,22 @@ register_model_group( register_model_group( models={ - "Yi-6B": "01-ai/Yi-6B", - "Yi-34B": "01-ai/Yi-34B", - "Yi-34B-Chat": "01-ai/Yi-34B-Chat", - "Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits" + "Yi-6B": { + "hf": "01-ai/Yi-6B", + "ms": "01ai/Yi-6B", + }, + "Yi-34B": { + "hf": "01-ai/Yi-34B", + "ms": "01ai/Yi-34B", + }, + "Yi-34B-Chat": { + "hf": "01-ai/Yi-34B-Chat", + "ms": "01ai/Yi-34B-Chat", + }, + "Yi-34B-int8-Chat": { + "hf": "01-ai/Yi-34B-Chat-8bits", + "ms": "01ai/Yi-34B-Chat-8bits", + } }, template="yi" ) @@ -308,8 +500,14 @@ register_model_group( register_model_group( models={ - "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", - "Zephyr-7B-Beta-Chat": "HuggingFaceH4/zephyr-7b-beta" + "Zephyr-7B-Alpha-Chat": { + "hf": "HuggingFaceH4/zephyr-7b-alpha", + "ms": "AI-ModelScope/zephyr-7b-alpha", + }, + "Zephyr-7B-Beta-Chat": { + "hf": "HuggingFaceH4/zephyr-7b-beta", + "ms": "modelscope/zephyr-7b-beta", + } }, template="zephyr" ) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 4bda39d5..ebf6cafa 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -8,7 +8,8 @@ class ModelArguments: Arguments pertaining to which model/config/tokenizer we are going to fine-tune. """ model_name_or_path: str = field( - metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models."} + metadata={"help": "Path to pretrained model or model identifier " + "from huggingface.co/models or modelscope.cn/models."} ) cache_dir: Optional[str] = field( default=None, diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 5561837a..122cd7f2 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -1,4 +1,6 @@ import math +import os + import torch from types import MethodType from typing import TYPE_CHECKING, Literal, Optional, Tuple @@ -63,6 +65,8 @@ def load_model_and_tokenizer( "token": model_args.hf_hub_token } + try_download_model_from_ms(model_args) + tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, use_fast=model_args.use_fast_tokenizer, @@ -228,3 +232,16 @@ def load_model_and_tokenizer( logger.info("This IS expected that the trainable params is 0 if you are using model for inference only.") return model, tokenizer + + +def try_download_model_from_ms(model_args): + if int(os.environ.get('USE_MODELSCOPE_HUB', '0')) and not os.path.exists(model_args.model_name_or_path): + try: + from modelscope import snapshot_download + revision = model_args.model_revision + if revision == 'main': + revision = 'master' + model_args.model_name_or_path = snapshot_download(model_args.model_name_or_path, revision) + except ImportError as e: + raise ImportError(f'You are using `USE_MODELSCOPE_HUB=1` but you have no modelscope sdk installed. ' + f'Please install it by `pip install modelscope -U`') from e diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py index 40f10c6a..dabfab16 100644 --- a/src/llmtuner/webui/common.py +++ b/src/llmtuner/webui/common.py @@ -11,15 +11,18 @@ from transformers.utils import ( ADAPTER_SAFE_WEIGHTS_NAME ) + from llmtuner.extras.constants import ( DEFAULT_MODULE, DEFAULT_TEMPLATE, SUPPORTED_MODELS, + ALL_OFFICIAL_MODELS, TRAINING_STAGES ) from llmtuner.hparams.data_args import DATA_CONFIG + DEFAULT_CACHE_DIR = "cache" DEFAULT_DATA_DIR = "data" DEFAULT_SAVE_DIR = "saves" @@ -63,7 +66,10 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona def get_model_path(model_name: str) -> str: user_config = load_config() - return user_config["path_dict"].get(model_name, None) or SUPPORTED_MODELS.get(model_name, "") + cached_path = user_config["path_dict"].get(model_name, None) + if cached_path in ALL_OFFICIAL_MODELS.get(model_name, []): + cached_path = None + return cached_path or SUPPORTED_MODELS.get(model_name, "") def get_prefix(model_name: str) -> str: