From e08e0e5814bf4245d3a3ed48d9a0d93d230236db Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 29 Nov 2023 20:36:55 +0800 Subject: [PATCH 1/6] support ms Former-commit-id: d38a2e7341100902b6c761895b1fe6191c905d06 --- src/llmtuner/extras/constants.py | 172 +++++++++++++++++++++++++++++ src/llmtuner/hparams/model_args.py | 3 +- src/llmtuner/model/loader.py | 14 +++ 3 files changed, 188 insertions(+), 1 deletion(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 3257678e..307f3c97 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1,3 +1,4 @@ +import os from collections import defaultdict, OrderedDict from typing import Dict, Optional @@ -20,6 +21,8 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() +MODELSCOPE_MODELS = OrderedDict() + TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", "Reward Modeling": "rm", @@ -40,7 +43,11 @@ def register_model_group( prefix = name.split("-")[0] else: assert prefix == name.split("-")[0], "prefix should be identical." + SUPPORTED_MODELS[name] = path + if os.environ.get('USE_MODELSCOPE_HUB', False) and name in MODELSCOPE_MODELS: + # Use ModelScope modelhub + SUPPORTED_MODELS[name] = MODELSCOPE_MODELS[name] if module is not None: DEFAULT_MODULE[prefix] = module if template is not None: @@ -58,6 +65,13 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Baichuan-7B-Base": "baichuan-inc/baichuan-7B", + "Baichuan-13B-Base": "baichuan-inc/Baichuan-13B-Base", + "Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Base" +}) + + register_model_group( models={ "Baichuan2-7B-Base": "baichuan-inc/Baichuan2-7B-Base", @@ -70,6 +84,14 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Baichuan2-7B-Base": "baichuan-inc/Baichuan2-7B-Base", + "Baichuan2-13B-Base": "baichuan-inc/Baichuan2-13B-Base", + "Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat", + "Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat" +}) + + register_model_group( models={ "BLOOM-560M": "bigscience/bloom-560m", @@ -80,6 +102,13 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "BLOOM-560M": "AI-ModelScope/bloom-560m", + "BLOOM-3B": "bigscience/bloom-3b", + "BLOOM-7B1": "bigscience/bloom-7b1" +}) + + register_model_group( models={ "BLOOMZ-560M": "bigscience/bloomz-560m", @@ -90,6 +119,13 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "BLOOMZ-560M": "bigscience/bloomz-560m", + "BLOOMZ-3B": "bigscience/bloomz-3b", + "BLOOMZ-7B1-mt": "AI-ModelScope/bloomz-7b1-mt" +}) + + register_model_group( models={ "BlueLM-7B-Base": "vivo-ai/BlueLM-7B-Base", @@ -99,6 +135,12 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "BlueLM-7B-Base": "vivo-ai/BlueLM-7B-Base", + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" +}) + + register_model_group( models={ "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b" @@ -108,6 +150,11 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "ChatGLM2-6B-Chat": "ZhipuAI/chatglm2-6b" +}) + + register_model_group( models={ "ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base", @@ -118,6 +165,12 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "ChatGLM3-6B-Base": "ZhipuAI/chatglm3-6b-base", + "ChatGLM3-6B-Chat": "ZhipuAI/chatglm3-6b" +}) + + register_model_group( models={ "ChineseLLaMA2-1.3B": "hfl/chinese-llama-2-1.3b", @@ -131,6 +184,16 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "ChineseLLaMA2-1.3B": "hfl/chinese-llama-2-1.3b", + "ChineseLLaMA2-7B": "hfl/chinese-llama-2-7b", + "ChineseLLaMA2-13B": "hfl/chinese-llama-2-13b", + "ChineseLLaMA2-1.3B-Chat": "hfl/chinese-alpaca-2-1.3b", + "ChineseLLaMA2-7B-Chat": "hfl/chinese-alpaca-2-7b", + "ChineseLLaMA2-13B-Chat": "hfl/chinese-alpaca-2-13b" +}) + + register_model_group( models={ "Falcon-7B": "tiiuae/falcon-7b", @@ -145,6 +208,16 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Falcon-7B": "tiiuae/falcon-7b", + "Falcon-40B": "tiiuae/falcon-40b", + "Falcon-180B": "tiiuae/falcon-180B", + "Falcon-7B-Chat": "AI-ModelScope/falcon-7b-instruct", + "Falcon-40B-Chat": "tiiuae/falcon-40b-instruct", + "Falcon-180B-Chat": "tiiuae/falcon-180B-chat" +}) + + register_model_group( models={ "InternLM-7B": "internlm/internlm-7b", @@ -156,6 +229,14 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "InternLM-7B": "Shanghai_AI_Laboratory/internlm-7b", + "InternLM-20B": "Shanghai_AI_Laboratory/internlm-20b", + "InternLM-7B-Chat": "Shanghai_AI_Laboratory/internlm-chat-7b", + "InternLM-20B-Chat": "Shanghai_AI_Laboratory/internlm-chat-20b" +}) + + register_model_group( models={ "LingoWhale-8B": "deeplang-ai/LingoWhale-8B" @@ -164,6 +245,11 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "LingoWhale-8B": "DeepLang/LingoWhale-8B" +}) + + register_model_group( models={ "LLaMA-7B": "huggyllama/llama-7b", @@ -174,6 +260,14 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "LLaMA-7B": "skyline2006/llama-7b", + "LLaMA-13B": "skyline2006/llama-13b", + "LLaMA-30B": "skyline2006/llama-30b", + "LLaMA-65B": "skyline2006/llama-65b" +}) + + register_model_group( models={ "LLaMA2-7B": "meta-llama/Llama-2-7b-hf", @@ -187,6 +281,16 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "LLaMA2-7B": "modelscope/Llama-2-7b-ms", + "LLaMA2-13B": "modelscope/Llama-2-13b-ms", + "LLaMA2-70B": "modelscope/Llama-2-70b-ms", + "LLaMA2-7B-Chat": "modelscope/Llama-2-7b-chat-ms", + "LLaMA2-13B-Chat": "modelscope/Llama-2-13b-chat-ms", + "LLaMA2-70B-Chat": "modelscope/Llama-2-70b-chat-ms" +}) + + register_model_group( models={ "Mistral-7B": "mistralai/Mistral-7B-v0.1", @@ -196,6 +300,12 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Mistral-7B": "AI-ModelScope/Mistral-7B-v0.1", + "Mistral-7B-Chat": "AI-ModelScope/Mistral-7B-Instruct-v0.1" +}) + + register_model_group( models={ "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" @@ -204,6 +314,11 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" +}) + + register_model_group( models={ "Phi1.5-1.3B": "microsoft/phi-1_5" @@ -212,6 +327,11 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Phi1.5-1.3B": "microsoft/phi-1_5" +}) + + register_model_group( models={ "Qwen-7B": "Qwen/Qwen-7B", @@ -228,6 +348,18 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Qwen-7B": "qwen/Qwen-7B", + "Qwen-14B": "qwen/Qwen-14B", + "Qwen-7B-Chat": "qwen/Qwen-7B-Chat", + "Qwen-14B-Chat": "qwen/Qwen-14B-Chat", + "Qwen-7B-int8-Chat": "qwen/Qwen-7B-Chat-Int8", + "Qwen-7B-int4-Chat": "qwen/Qwen-7B-Chat-Int4", + "Qwen-14B-int8-Chat": "qwen/Qwen-14B-Chat-Int8", + "Qwen-14B-int4-Chat": "qwen/Qwen-14B-Chat-Int4" +}) + + register_model_group( models={ "Skywork-13B-Base": "Skywork/Skywork-13B-base" @@ -235,6 +367,11 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Skywork-13B-Base": "skywork/Skywork-13B-base" +}) + + register_model_group( models={ "Vicuna1.5-7B-Chat": "lmsys/vicuna-7b-v1.5", @@ -244,6 +381,12 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Vicuna1.5-7B-Chat": "AI-ModelScope/vicuna-7b-v1.5", + "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5" +}) + + register_model_group( models={ "XVERSE-7B": "xverse/XVERSE-7B", @@ -256,6 +399,15 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "XVERSE-7B": "xverse/XVERSE-7B", + "XVERSE-13B": "xverse/XVERSE-13B", + "XVERSE-65B": "xverse/XVERSE-65B", + "XVERSE-7B-Chat": "xverse/XVERSE-7B-Chat", + "XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat" +}) + + register_model_group( models={ "Yayi-7B": "wenge-research/yayi-7b-llama2", @@ -265,6 +417,12 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Yayi-7B": "wenge-research/yayi-7b-llama2", + "Yayi-13B": "wenge-research/yayi-13b-llama2" +}) + + register_model_group( models={ "Yi-6B": "01-ai/Yi-6B", @@ -276,6 +434,14 @@ register_model_group( ) +MODELSCOPE_MODELS.update({ + "Yi-6B": "01ai/Yi-6B", + "Yi-34B": "01ai/Yi-34B", + "Yi-34B-Chat": "01ai/Yi-34B-Chat", + "Yi-34B-int8-Chat": "01ai/Yi-34B-Chat-8bits" +}) + + register_model_group( models={ "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", @@ -283,3 +449,9 @@ register_model_group( }, template="zephyr" ) + + +MODELSCOPE_MODELS.update({ + "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", + "Zephyr-7B-Beta-Chat": "modelscope/zephyr-7b-beta" +}) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 4bda39d5..ebf6cafa 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -8,7 +8,8 @@ class ModelArguments: Arguments pertaining to which model/config/tokenizer we are going to fine-tune. """ model_name_or_path: str = field( - metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models."} + metadata={"help": "Path to pretrained model or model identifier " + "from huggingface.co/models or modelscope.cn/models."} ) cache_dir: Optional[str] = field( default=None, diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 5561837a..5fed514a 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -1,4 +1,6 @@ import math +import os + import torch from types import MethodType from typing import TYPE_CHECKING, Literal, Optional, Tuple @@ -63,6 +65,8 @@ def load_model_and_tokenizer( "token": model_args.hf_hub_token } + try_download_model_from_ms(model_args) + tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, use_fast=model_args.use_fast_tokenizer, @@ -228,3 +232,13 @@ def load_model_and_tokenizer( logger.info("This IS expected that the trainable params is 0 if you are using model for inference only.") return model, tokenizer + + +def try_download_model_from_ms(model_args): + if os.environ.get('USE_MODELSCOPE_HUB', False) and not os.path.exists(model_args.model_name_or_path): + try: + from modelscope import snapshot_download + model_args.model_name_or_path = snapshot_download(model_args.model_name_or_path, model_args.model_revision) + except ImportError as e: + raise ImportError(f'You are using `USE_MODELSCOPE_HUB=True` but you have no modelscope sdk installed. ' + f'Please install it by `pip install modelscope -U`') from e From 45925e4a9ce43d5f100175cd6a38a32bff626f08 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 29 Nov 2023 21:43:58 +0800 Subject: [PATCH 2/6] fix Former-commit-id: fb2204c183ae8c061ed6ec7f4f1bfbb0b4900c9b --- src/llmtuner/extras/constants.py | 543 +++++++++++++++++-------------- src/llmtuner/model/loader.py | 5 +- 2 files changed, 303 insertions(+), 245 deletions(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 307f3c97..9de07bc2 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1,7 +1,6 @@ import os from collections import defaultdict, OrderedDict -from typing import Dict, Optional - +from typing import Dict, Optional, Union CHOICES = ["A", "B", "C", "D"] @@ -33,7 +32,7 @@ TRAINING_STAGES = { def register_model_group( - models: Dict[str, str], + models: Dict[str, Union[str, Dict[str, str]]], module: Optional[str] = None, template: Optional[str] = None ) -> None: @@ -44,10 +43,12 @@ def register_model_group( else: assert prefix == name.split("-")[0], "prefix should be identical." - SUPPORTED_MODELS[name] = path - if os.environ.get('USE_MODELSCOPE_HUB', False) and name in MODELSCOPE_MODELS: + if not os.environ.get('USE_MODELSCOPE_HUB', False): + # If path is a string, we treat it as a huggingface model-id by default. + SUPPORTED_MODELS[name] = path["hf"] if isinstance(path, dict) else path + elif isinstance(path, dict) and "ms" in path: # Use ModelScope modelhub - SUPPORTED_MODELS[name] = MODELSCOPE_MODELS[name] + SUPPORTED_MODELS[name] = path["ms"] if module is not None: DEFAULT_MODULE[prefix] = module if template is not None: @@ -56,402 +57,456 @@ def register_model_group( register_model_group( models={ - "Baichuan-7B-Base": "baichuan-inc/Baichuan-7B", - "Baichuan-13B-Base": "baichuan-inc/Baichuan-13B-Base", - "Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat" + "Baichuan-7B-Base": { + "hf": "baichuan-inc/Baichuan-7B", + "ms": "baichuan-inc/baichuan-7B", + }, + "Baichuan-13B-Base": { + "hf": "baichuan-inc/Baichuan-13B-Base", + "ms": "baichuan-inc/Baichuan-13B-Base", + }, + "Baichuan-13B-Chat": { + "hf": "baichuan-inc/Baichuan-13B-Chat", + "ms": "baichuan-inc/Baichuan-13B-Base", + } }, module="W_pack", template="baichuan" ) -MODELSCOPE_MODELS.update({ - "Baichuan-7B-Base": "baichuan-inc/baichuan-7B", - "Baichuan-13B-Base": "baichuan-inc/Baichuan-13B-Base", - "Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Base" -}) - - register_model_group( models={ - "Baichuan2-7B-Base": "baichuan-inc/Baichuan2-7B-Base", - "Baichuan2-13B-Base": "baichuan-inc/Baichuan2-13B-Base", - "Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat", - "Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat" + "Baichuan2-7B-Base": { + "hf": "baichuan-inc/Baichuan2-7B-Base", + "ms": "baichuan-inc/Baichuan2-7B-Base", + }, + "Baichuan2-13B-Base": { + "hf": "baichuan-inc/Baichuan2-13B-Base", + "ms": "baichuan-inc/Baichuan2-13B-Base", + }, + "Baichuan2-7B-Chat": { + "hf": "baichuan-inc/Baichuan2-7B-Chat", + "ms": "baichuan-inc/Baichuan2-7B-Chat", + }, + "Baichuan2-13B-Chat": { + "hf": "baichuan-inc/Baichuan2-13B-Chat", + "ms": "baichuan-inc/Baichuan2-13B-Chat", + } }, module="W_pack", template="baichuan2" ) -MODELSCOPE_MODELS.update({ - "Baichuan2-7B-Base": "baichuan-inc/Baichuan2-7B-Base", - "Baichuan2-13B-Base": "baichuan-inc/Baichuan2-13B-Base", - "Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat", - "Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat" -}) - - register_model_group( models={ - "BLOOM-560M": "bigscience/bloom-560m", - "BLOOM-3B": "bigscience/bloom-3b", - "BLOOM-7B1": "bigscience/bloom-7b1" + "BLOOM-560M": { + "hf": "bigscience/bloom-560m", + "ms": "AI-ModelScope/bloom-560m", + }, + "BLOOM-3B": { + "hf": "bigscience/bloom-3b", + "ms": "bigscience/bloom-3b", + }, + "BLOOM-7B1": { + "hf": "bigscience/bloom-7b1", + "ms": "bigscience/bloom-7b1", + } }, module="query_key_value" ) -MODELSCOPE_MODELS.update({ - "BLOOM-560M": "AI-ModelScope/bloom-560m", - "BLOOM-3B": "bigscience/bloom-3b", - "BLOOM-7B1": "bigscience/bloom-7b1" -}) - - register_model_group( models={ - "BLOOMZ-560M": "bigscience/bloomz-560m", - "BLOOMZ-3B": "bigscience/bloomz-3b", - "BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt" + "BLOOMZ-560M": { + "hf": "bigscience/bloomz-560m", + "ms": "bigscience/bloomz-560m", + }, + "BLOOMZ-3B": { + "hf": "bigscience/bloomz-3b", + "ms": "bigscience/bloomz-3b", + }, + "BLOOMZ-7B1-mt": { + "hf": "bigscience/bloomz-7b1-mt", + "ms": "AI-ModelScope/bloomz-7b1-mt", + } }, module="query_key_value" ) -MODELSCOPE_MODELS.update({ - "BLOOMZ-560M": "bigscience/bloomz-560m", - "BLOOMZ-3B": "bigscience/bloomz-3b", - "BLOOMZ-7B1-mt": "AI-ModelScope/bloomz-7b1-mt" -}) - - register_model_group( models={ - "BlueLM-7B-Base": "vivo-ai/BlueLM-7B-Base", - "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Base": { + "hf": "vivo-ai/BlueLM-7B-Base", + "ms": "vivo-ai/BlueLM-7B-Base", + }, + "BlueLM-7B-Chat": { + "hf": "vivo-ai/BlueLM-7B-Chat", + "ms": "vivo-ai/BlueLM-7B-Chat", + } }, template="bluelm" ) -MODELSCOPE_MODELS.update({ - "BlueLM-7B-Base": "vivo-ai/BlueLM-7B-Base", - "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" -}) - - register_model_group( models={ - "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b" + "ChatGLM2-6B-Chat": { + "hf": "THUDM/chatglm2-6b", + "ms": "ZhipuAI/chatglm2-6b", + } }, module="query_key_value", template="chatglm2" ) -MODELSCOPE_MODELS.update({ - "ChatGLM2-6B-Chat": "ZhipuAI/chatglm2-6b" -}) - - register_model_group( models={ - "ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base", - "ChatGLM3-6B-Chat": "THUDM/chatglm3-6b" + "ChatGLM3-6B-Base": { + "hf": "THUDM/chatglm3-6b-base", + "ms": "ZhipuAI/chatglm3-6b-base", + }, + "ChatGLM3-6B-Chat": { + "hf": "THUDM/chatglm3-6b", + "ms": "ZhipuAI/chatglm3-6b", + } }, module="query_key_value", template="chatglm3" ) -MODELSCOPE_MODELS.update({ - "ChatGLM3-6B-Base": "ZhipuAI/chatglm3-6b-base", - "ChatGLM3-6B-Chat": "ZhipuAI/chatglm3-6b" -}) - - register_model_group( models={ - "ChineseLLaMA2-1.3B": "hfl/chinese-llama-2-1.3b", - "ChineseLLaMA2-7B": "hfl/chinese-llama-2-7b", - "ChineseLLaMA2-13B": "hfl/chinese-llama-2-13b", - "ChineseLLaMA2-1.3B-Chat": "hfl/chinese-alpaca-2-1.3b", - "ChineseLLaMA2-7B-Chat": "hfl/chinese-alpaca-2-7b", - "ChineseLLaMA2-13B-Chat": "hfl/chinese-alpaca-2-13b" + "ChineseLLaMA2-1.3B": { + "hf": "hfl/chinese-llama-2-1.3b", + "ms": "hfl/chinese-llama-2-1.3b", + }, + "ChineseLLaMA2-7B": { + "hf": "hfl/chinese-llama-2-7b", + "ms": "hfl/chinese-llama-2-7b", + }, + "ChineseLLaMA2-13B": { + "hf": "hfl/chinese-llama-2-13b", + "ms": "hfl/chinese-llama-2-13b", + }, + "ChineseLLaMA2-1.3B-Chat": { + "hf": "hfl/chinese-alpaca-2-1.3b", + "ms": "hfl/chinese-alpaca-2-1.3b", + }, + "ChineseLLaMA2-7B-Chat": { + "hf": "hfl/chinese-alpaca-2-7b", + "ms": "hfl/chinese-alpaca-2-7b", + }, + "ChineseLLaMA2-13B-Chat": { + "hf": "hfl/chinese-alpaca-2-13b", + "ms": "hfl/chinese-alpaca-2-13b", + } }, template="llama2_zh" ) -MODELSCOPE_MODELS.update({ - "ChineseLLaMA2-1.3B": "hfl/chinese-llama-2-1.3b", - "ChineseLLaMA2-7B": "hfl/chinese-llama-2-7b", - "ChineseLLaMA2-13B": "hfl/chinese-llama-2-13b", - "ChineseLLaMA2-1.3B-Chat": "hfl/chinese-alpaca-2-1.3b", - "ChineseLLaMA2-7B-Chat": "hfl/chinese-alpaca-2-7b", - "ChineseLLaMA2-13B-Chat": "hfl/chinese-alpaca-2-13b" -}) - - register_model_group( models={ - "Falcon-7B": "tiiuae/falcon-7b", - "Falcon-40B": "tiiuae/falcon-40b", - "Falcon-180B": "tiiuae/falcon-180B", - "Falcon-7B-Chat": "tiiuae/falcon-7b-instruct", - "Falcon-40B-Chat": "tiiuae/falcon-40b-instruct", - "Falcon-180B-Chat": "tiiuae/falcon-180B-chat" + "Falcon-7B": { + "hf": "tiiuae/falcon-7b", + "ms": "tiiuae/falcon-7b", + }, + "Falcon-40B": { + "hf": "tiiuae/falcon-40b", + "ms": "tiiuae/falcon-40b", + }, + "Falcon-180B": { + "hf": "tiiuae/falcon-180B", + "ms": "tiiuae/falcon-180B", + }, + "Falcon-7B-Chat": { + "hf": "tiiuae/falcon-7b-instruct", + "ms": "AI-ModelScope/falcon-7b-instruct", + }, + "Falcon-40B-Chat": { + "hf": "tiiuae/falcon-40b-instruct", + "ms": "tiiuae/falcon-40b-instruct", + }, + "Falcon-180B-Chat": { + "hf": "tiiuae/falcon-180B-chat", + "ms": "tiiuae/falcon-180B-chat", + } }, module="query_key_value", template="falcon" ) -MODELSCOPE_MODELS.update({ - "Falcon-7B": "tiiuae/falcon-7b", - "Falcon-40B": "tiiuae/falcon-40b", - "Falcon-180B": "tiiuae/falcon-180B", - "Falcon-7B-Chat": "AI-ModelScope/falcon-7b-instruct", - "Falcon-40B-Chat": "tiiuae/falcon-40b-instruct", - "Falcon-180B-Chat": "tiiuae/falcon-180B-chat" -}) - - register_model_group( models={ - "InternLM-7B": "internlm/internlm-7b", - "InternLM-20B": "internlm/internlm-20b", - "InternLM-7B-Chat": "internlm/internlm-chat-7b", - "InternLM-20B-Chat": "internlm/internlm-chat-20b" + "InternLM-7B": { + "hf": "internlm/internlm-7b", + "ms": "Shanghai_AI_Laboratory/internlm-7b", + }, + "InternLM-20B": { + "hf": "internlm/internlm-20b", + "ms": "Shanghai_AI_Laboratory/internlm-20b", + }, + "InternLM-7B-Chat": { + "hf": "internlm/internlm-chat-7b", + "ms": "Shanghai_AI_Laboratory/internlm-chat-7b", + }, + "InternLM-20B-Chat": { + "hf": "internlm/internlm-chat-20b", + "ms": "Shanghai_AI_Laboratory/internlm-chat-20b", + } }, template="intern" ) -MODELSCOPE_MODELS.update({ - "InternLM-7B": "Shanghai_AI_Laboratory/internlm-7b", - "InternLM-20B": "Shanghai_AI_Laboratory/internlm-20b", - "InternLM-7B-Chat": "Shanghai_AI_Laboratory/internlm-chat-7b", - "InternLM-20B-Chat": "Shanghai_AI_Laboratory/internlm-chat-20b" -}) - - register_model_group( models={ - "LingoWhale-8B": "deeplang-ai/LingoWhale-8B" + "LingoWhale-8B": { + "hf": "deeplang-ai/LingoWhale-8B", + "ms": "DeepLang/LingoWhale-8B", + } }, module="qkv_proj" ) -MODELSCOPE_MODELS.update({ - "LingoWhale-8B": "DeepLang/LingoWhale-8B" -}) - - register_model_group( models={ - "LLaMA-7B": "huggyllama/llama-7b", - "LLaMA-13B": "huggyllama/llama-13b", - "LLaMA-30B": "huggyllama/llama-30b", - "LLaMA-65B": "huggyllama/llama-65b" + "LLaMA-7B": { + "hf": "huggyllama/llama-7b", + "ms": "skyline2006/llama-7b", + }, + "LLaMA-13B": { + "hf": "huggyllama/llama-13b", + "ms": "skyline2006/llama-13b", + }, + "LLaMA-30B": { + "hf": "huggyllama/llama-30b", + "ms": "skyline2006/llama-30b", + }, + "LLaMA-65B": { + "hf": "huggyllama/llama-65b", + "ms": "skyline2006/llama-65b", + } } ) -MODELSCOPE_MODELS.update({ - "LLaMA-7B": "skyline2006/llama-7b", - "LLaMA-13B": "skyline2006/llama-13b", - "LLaMA-30B": "skyline2006/llama-30b", - "LLaMA-65B": "skyline2006/llama-65b" -}) - - register_model_group( models={ - "LLaMA2-7B": "meta-llama/Llama-2-7b-hf", - "LLaMA2-13B": "meta-llama/Llama-2-13b-hf", - "LLaMA2-70B": "meta-llama/Llama-2-70b-hf", - "LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf", - "LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf", - "LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf" + "LLaMA2-7B": { + "hf": "meta-llama/Llama-2-7b-hf", + "ms": "modelscope/Llama-2-7b-ms", + }, + "LLaMA2-13B": { + "hf": "meta-llama/Llama-2-13b-hf", + "ms": "modelscope/Llama-2-13b-ms", + }, + "LLaMA2-70B": { + "hf": "meta-llama/Llama-2-70b-hf", + "ms": "modelscope/Llama-2-70b-ms", + }, + "LLaMA2-7B-Chat": { + "hf": "meta-llama/Llama-2-7b-chat-hf", + "ms": "modelscope/Llama-2-7b-chat-ms", + }, + "LLaMA2-13B-Chat": { + "hf": "meta-llama/Llama-2-13b-chat-hf", + "ms": "modelscope/Llama-2-13b-chat-ms", + }, + "LLaMA2-70B-Chat": { + "hf": "meta-llama/Llama-2-70b-chat-hf", + "ms": "modelscope/Llama-2-70b-chat-ms", + } }, template="llama2" ) -MODELSCOPE_MODELS.update({ - "LLaMA2-7B": "modelscope/Llama-2-7b-ms", - "LLaMA2-13B": "modelscope/Llama-2-13b-ms", - "LLaMA2-70B": "modelscope/Llama-2-70b-ms", - "LLaMA2-7B-Chat": "modelscope/Llama-2-7b-chat-ms", - "LLaMA2-13B-Chat": "modelscope/Llama-2-13b-chat-ms", - "LLaMA2-70B-Chat": "modelscope/Llama-2-70b-chat-ms" -}) - - register_model_group( models={ - "Mistral-7B": "mistralai/Mistral-7B-v0.1", - "Mistral-7B-Chat": "mistralai/Mistral-7B-Instruct-v0.1" + "Mistral-7B": { + "hf": "mistralai/Mistral-7B-v0.1", + "ms": "AI-ModelScope/Mistral-7B-v0.1", + }, + "Mistral-7B-Chat": { + "hf": "mistralai/Mistral-7B-Instruct-v0.1", + "ms": "AI-ModelScope/Mistral-7B-Instruct-v0.1", + } }, template="mistral" ) -MODELSCOPE_MODELS.update({ - "Mistral-7B": "AI-ModelScope/Mistral-7B-v0.1", - "Mistral-7B-Chat": "AI-ModelScope/Mistral-7B-Instruct-v0.1" -}) - - register_model_group( models={ - "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" + "OpenChat3.5-7B-Chat": { + "hf": "openchat/openchat_3.5", + "ms": "openchat/openchat_3.5", + } }, template="openchat" ) -MODELSCOPE_MODELS.update({ - "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" -}) - - register_model_group( models={ - "Phi1.5-1.3B": "microsoft/phi-1_5" + "Phi1.5-1.3B": { + "hf": "microsoft/phi-1_5", + "ms": "microsoft/phi-1_5", + } }, module="Wqkv" ) -MODELSCOPE_MODELS.update({ - "Phi1.5-1.3B": "microsoft/phi-1_5" -}) - - register_model_group( models={ - "Qwen-7B": "Qwen/Qwen-7B", - "Qwen-14B": "Qwen/Qwen-14B", - "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", - "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8", - "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4", - "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8", - "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4" + "Qwen-7B": { + "hf": "Qwen/Qwen-7B", + "ms": "qwen/Qwen-7B", + }, + "Qwen-14B": { + "hf": "Qwen/Qwen-14B", + "ms": "qwen/Qwen-14B", + }, + "Qwen-7B-Chat": { + "hf": "Qwen/Qwen-7B-Chat", + "ms": "qwen/Qwen-7B-Chat", + }, + "Qwen-14B-Chat": { + "hf": "Qwen/Qwen-14B-Chat", + "ms": "qwen/Qwen-14B-Chat", + }, + "Qwen-7B-int8-Chat": { + "hf": "Qwen/Qwen-7B-Chat-Int8", + "ms": "qwen/Qwen-7B-Chat-Int8", + }, + "Qwen-7B-int4-Chat": { + "hf": "Qwen/Qwen-7B-Chat-Int4", + "ms": "qwen/Qwen-7B-Chat-Int4", + }, + "Qwen-14B-int8-Chat": { + "hf": "Qwen/Qwen-14B-Chat-Int8", + "ms": "qwen/Qwen-14B-Chat-Int8", + }, + "Qwen-14B-int4-Chat": { + "hf": "Qwen/Qwen-14B-Chat-Int4", + "ms": "qwen/Qwen-14B-Chat-Int4", + } }, module="c_attn", template="qwen" ) -MODELSCOPE_MODELS.update({ - "Qwen-7B": "qwen/Qwen-7B", - "Qwen-14B": "qwen/Qwen-14B", - "Qwen-7B-Chat": "qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "qwen/Qwen-14B-Chat", - "Qwen-7B-int8-Chat": "qwen/Qwen-7B-Chat-Int8", - "Qwen-7B-int4-Chat": "qwen/Qwen-7B-Chat-Int4", - "Qwen-14B-int8-Chat": "qwen/Qwen-14B-Chat-Int8", - "Qwen-14B-int4-Chat": "qwen/Qwen-14B-Chat-Int4" -}) - - register_model_group( models={ - "Skywork-13B-Base": "Skywork/Skywork-13B-base" + "Skywork-13B-Base": { + "hf": "Skywork/Skywork-13B-base", + "ms": "skywork/Skywork-13B-base", + } } ) -MODELSCOPE_MODELS.update({ - "Skywork-13B-Base": "skywork/Skywork-13B-base" -}) - - register_model_group( models={ - "Vicuna1.5-7B-Chat": "lmsys/vicuna-7b-v1.5", - "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5" + "Vicuna1.5-7B-Chat": { + "hf": "lmsys/vicuna-7b-v1.5", + "ms": "AI-ModelScope/vicuna-7b-v1.5", + }, + "Vicuna1.5-13B-Chat": { + "hf": "lmsys/vicuna-13b-v1.5", + "ms": "lmsys/vicuna-13b-v1.5", + } }, template="vicuna" ) -MODELSCOPE_MODELS.update({ - "Vicuna1.5-7B-Chat": "AI-ModelScope/vicuna-7b-v1.5", - "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5" -}) - - register_model_group( models={ - "XVERSE-7B": "xverse/XVERSE-7B", - "XVERSE-13B": "xverse/XVERSE-13B", - "XVERSE-65B": "xverse/XVERSE-65B", - "XVERSE-7B-Chat": "xverse/XVERSE-7B-Chat", - "XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat" + "XVERSE-7B": { + "hf": "xverse/XVERSE-7B", + "ms": "xverse/XVERSE-7B", + }, + "XVERSE-13B": { + "hf": "xverse/XVERSE-13B", + "ms": "xverse/XVERSE-13B", + }, + "XVERSE-65B": { + "hf": "xverse/XVERSE-65B", + "ms": "xverse/XVERSE-65B", + }, + "XVERSE-7B-Chat": { + "hf": "xverse/XVERSE-7B-Chat", + "ms": "xverse/XVERSE-7B-Chat", + }, + "XVERSE-13B-Chat": { + "hf": "xverse/XVERSE-13B-Chat", + "ms": "xverse/XVERSE-13B-Chat", + } }, template="xverse" ) -MODELSCOPE_MODELS.update({ - "XVERSE-7B": "xverse/XVERSE-7B", - "XVERSE-13B": "xverse/XVERSE-13B", - "XVERSE-65B": "xverse/XVERSE-65B", - "XVERSE-7B-Chat": "xverse/XVERSE-7B-Chat", - "XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat" -}) - - register_model_group( models={ - "Yayi-7B": "wenge-research/yayi-7b-llama2", - "Yayi-13B": "wenge-research/yayi-13b-llama2" + "Yayi-7B": { + "hf": "wenge-research/yayi-7b-llama2", + "ms": "wenge-research/yayi-7b-llama2", + }, + "Yayi-13B": { + "hf": "wenge-research/yayi-13b-llama2", + "ms": "wenge-research/yayi-13b-llama2", + } }, template="yayi" ) -MODELSCOPE_MODELS.update({ - "Yayi-7B": "wenge-research/yayi-7b-llama2", - "Yayi-13B": "wenge-research/yayi-13b-llama2" -}) - - register_model_group( models={ - "Yi-6B": "01-ai/Yi-6B", - "Yi-34B": "01-ai/Yi-34B", - "Yi-34B-Chat": "01-ai/Yi-34B-Chat", - "Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits" + "Yi-6B": { + "hf": "01-ai/Yi-6B", + "ms": "01ai/Yi-6B", + }, + "Yi-34B": { + "hf": "01-ai/Yi-34B", + "ms": "01ai/Yi-34B", + }, + "Yi-34B-Chat": { + "hf": "01-ai/Yi-34B-Chat", + "ms": "01ai/Yi-34B-Chat", + }, + "Yi-34B-int8-Chat": { + "hf": "01-ai/Yi-34B-Chat-8bits", + "ms": "01ai/Yi-34B-Chat-8bits", + } }, template="yi" ) -MODELSCOPE_MODELS.update({ - "Yi-6B": "01ai/Yi-6B", - "Yi-34B": "01ai/Yi-34B", - "Yi-34B-Chat": "01ai/Yi-34B-Chat", - "Yi-34B-int8-Chat": "01ai/Yi-34B-Chat-8bits" -}) - - register_model_group( models={ - "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", - "Zephyr-7B-Beta-Chat": "HuggingFaceH4/zephyr-7b-beta" + "Zephyr-7B-Alpha-Chat": { + "hf": "HuggingFaceH4/zephyr-7b-alpha", + "ms": "HuggingFaceH4/zephyr-7b-alpha", + }, + "Zephyr-7B-Beta-Chat": { + "hf": "HuggingFaceH4/zephyr-7b-beta", + "ms": "modelscope/zephyr-7b-beta", + } }, template="zephyr" ) - - -MODELSCOPE_MODELS.update({ - "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", - "Zephyr-7B-Beta-Chat": "modelscope/zephyr-7b-beta" -}) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 5fed514a..85e7ed31 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -238,7 +238,10 @@ def try_download_model_from_ms(model_args): if os.environ.get('USE_MODELSCOPE_HUB', False) and not os.path.exists(model_args.model_name_or_path): try: from modelscope import snapshot_download - model_args.model_name_or_path = snapshot_download(model_args.model_name_or_path, model_args.model_revision) + revision = model_args.model_revision + if revision == 'main': + revision = 'master' + model_args.model_name_or_path = snapshot_download(model_args.model_name_or_path, revision) except ImportError as e: raise ImportError(f'You are using `USE_MODELSCOPE_HUB=True` but you have no modelscope sdk installed. ' f'Please install it by `pip install modelscope -U`') from e From 63e12226a0d3f215eef5ab4655af1c05a4bafca8 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Fri, 1 Dec 2023 15:06:17 +0800 Subject: [PATCH 3/6] add model Former-commit-id: 8ce4d11e38518b0b4657c7e64394d471cbb0bd6d --- src/llmtuner/extras/constants.py | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 9de07bc2..3930a2e3 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -107,11 +107,11 @@ register_model_group( }, "BLOOM-3B": { "hf": "bigscience/bloom-3b", - "ms": "bigscience/bloom-3b", + "ms": "AI-ModelScope/bloom-3b", }, "BLOOM-7B1": { "hf": "bigscience/bloom-7b1", - "ms": "bigscience/bloom-7b1", + "ms": "AI-ModelScope/bloom-7b1", } }, module="query_key_value" @@ -122,11 +122,11 @@ register_model_group( models={ "BLOOMZ-560M": { "hf": "bigscience/bloomz-560m", - "ms": "bigscience/bloomz-560m", + "ms": "AI-ModelScope/bloomz-560m", }, "BLOOMZ-3B": { "hf": "bigscience/bloomz-3b", - "ms": "bigscience/bloomz-3b", + "ms": "AI-ModelScope/bloomz-3b", }, "BLOOMZ-7B1-mt": { "hf": "bigscience/bloomz-7b1-mt", @@ -184,27 +184,27 @@ register_model_group( models={ "ChineseLLaMA2-1.3B": { "hf": "hfl/chinese-llama-2-1.3b", - "ms": "hfl/chinese-llama-2-1.3b", + "ms": "AI-ModelScope/chinese-llama-2-1.3b", }, "ChineseLLaMA2-7B": { "hf": "hfl/chinese-llama-2-7b", - "ms": "hfl/chinese-llama-2-7b", + "ms": "AI-ModelScope/chinese-llama-2-7b", }, "ChineseLLaMA2-13B": { "hf": "hfl/chinese-llama-2-13b", - "ms": "hfl/chinese-llama-2-13b", + "ms": "AI-ModelScope/chinese-llama-2-13b", }, "ChineseLLaMA2-1.3B-Chat": { "hf": "hfl/chinese-alpaca-2-1.3b", - "ms": "hfl/chinese-alpaca-2-1.3b", + "ms": "AI-ModelScope/chinese-alpaca-2-1.3b", }, "ChineseLLaMA2-7B-Chat": { "hf": "hfl/chinese-alpaca-2-7b", - "ms": "hfl/chinese-alpaca-2-7b", + "ms": "AI-ModelScope/chinese-alpaca-2-7b", }, "ChineseLLaMA2-13B-Chat": { "hf": "hfl/chinese-alpaca-2-13b", - "ms": "hfl/chinese-alpaca-2-13b", + "ms": "AI-ModelScope/chinese-alpaca-2-13b", } }, template="llama2_zh" @@ -215,15 +215,15 @@ register_model_group( models={ "Falcon-7B": { "hf": "tiiuae/falcon-7b", - "ms": "tiiuae/falcon-7b", + "ms": "AI-ModelScope/falcon-7b", }, "Falcon-40B": { "hf": "tiiuae/falcon-40b", - "ms": "tiiuae/falcon-40b", + "ms": "AI-ModelScope/falcon-40b", }, "Falcon-180B": { "hf": "tiiuae/falcon-180B", - "ms": "tiiuae/falcon-180B", + "ms": "AI-ModelScope/falcon-180B", }, "Falcon-7B-Chat": { "hf": "tiiuae/falcon-7b-instruct", @@ -231,11 +231,11 @@ register_model_group( }, "Falcon-40B-Chat": { "hf": "tiiuae/falcon-40b-instruct", - "ms": "tiiuae/falcon-40b-instruct", + "ms": "AI-ModelScope/falcon-40b-instruct", }, "Falcon-180B-Chat": { "hf": "tiiuae/falcon-180B-chat", - "ms": "tiiuae/falcon-180B-chat", + "ms": "AI-ModelScope/falcon-180B-chat", } }, module="query_key_value", @@ -349,7 +349,7 @@ register_model_group( models={ "OpenChat3.5-7B-Chat": { "hf": "openchat/openchat_3.5", - "ms": "openchat/openchat_3.5", + "ms": "myxiongmodel/openchat_3.5", } }, template="openchat" @@ -360,7 +360,7 @@ register_model_group( models={ "Phi1.5-1.3B": { "hf": "microsoft/phi-1_5", - "ms": "microsoft/phi-1_5", + "ms": "allspace/PHI_1-5", } }, module="Wqkv" @@ -425,7 +425,7 @@ register_model_group( }, "Vicuna1.5-13B-Chat": { "hf": "lmsys/vicuna-13b-v1.5", - "ms": "lmsys/vicuna-13b-v1.5", + "ms": "Xorbits/vicuna-13b-v1.5", } }, template="vicuna" @@ -463,11 +463,11 @@ register_model_group( models={ "Yayi-7B": { "hf": "wenge-research/yayi-7b-llama2", - "ms": "wenge-research/yayi-7b-llama2", + "ms": "AI-ModelScope/yayi-7b-llama2", }, "Yayi-13B": { "hf": "wenge-research/yayi-13b-llama2", - "ms": "wenge-research/yayi-13b-llama2", + "ms": "AI-ModelScope/yayi-13b-llama2", } }, template="yayi" @@ -501,7 +501,7 @@ register_model_group( models={ "Zephyr-7B-Alpha-Chat": { "hf": "HuggingFaceH4/zephyr-7b-alpha", - "ms": "HuggingFaceH4/zephyr-7b-alpha", + "ms": "AI-ModelScope/zephyr-7b-alpha", }, "Zephyr-7B-Beta-Chat": { "hf": "HuggingFaceH4/zephyr-7b-beta", From b2200409f57d1a67237b5be1cc27c29b1a5b57b7 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Fri, 1 Dec 2023 16:11:30 +0800 Subject: [PATCH 4/6] add readme Former-commit-id: 5aa6751e52b5c2e06727c50e60218226b146b7bf --- README.md | 29 +++++++++++++++++++++++++++++ README_zh.md | 29 +++++++++++++++++++++++++++++ src/llmtuner/extras/constants.py | 2 +- src/llmtuner/model/loader.py | 4 ++-- 4 files changed, 61 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7ef3b6f1..0a03d61c 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[23/12/01] We supported **[ModelScope Hub](https://www.modelscope.cn/models)** to accelerate model downloading. Add environment variable `USE_MODELSCOPE_HUB=1` to your command line, then you can use the model-id of ModelScope Hub. + [23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neft_alpha` argument to activate NEFTune, e.g., `--neft_alpha 5`. [23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention. @@ -227,6 +229,33 @@ If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you wi pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.39.1-py3-none-win_amd64.whl ``` +### Use ModelScope Models + +If you have trouble with downloading models from HuggingFace, we have supported ModelScope Hub. To use LLaMA-Factory together with ModelScope, please add a environment variable: + +```shell +export USE_MODELSCOPE_HUB=1 +``` + +> [!NOTE] +> +> Please use integers only. 0 or not set for using HuggingFace hub. Other values will be treated as use ModelScope hub. + +Then you can use LLaMA-Factory with ModelScope model-ids: + +```shell +python src/train_bash.py \ + --model_name_or_path ZhipuAI/chatglm3-6b \ + ... other arguments +# You can find all model ids in this link: https://www.modelscope.cn/models +``` + +Web demo also supports ModelScope, after setting the environment variable please run with this command: + +```shell +CUDA_VISIBLE_DEVICES=0 python src/train_web.py +``` + ### Train on a single GPU > [!IMPORTANT] diff --git a/README_zh.md b/README_zh.md index d6f01d2f..9c92c261 100644 --- a/README_zh.md +++ b/README_zh.md @@ -51,6 +51,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 ## 更新日志 +[23/12/01] 我们支持了 **[魔搭ModelHub](https://www.modelscope.cn/models)** 进行模型下载加速。在启动命令前环境变量中增加 `USE_MODELSCOPE_HUB=1` 即可开启。 + [23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neft_alpha` 参数启用 NEFTune,例如 `--neft_alpha 5`。 [23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。 @@ -227,6 +229,33 @@ pip install -r requirements.txt pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.39.1-py3-none-win_amd64.whl ``` +### 使用魔搭的模型 + +如果下载HuggingFace模型存在问题,我们已经支持了魔搭的ModelHub,只需要添加一个环境变量: + +```shell +export USE_MODELSCOPE_HUB=1 +``` + +> [!NOTE] +> +> 该环境变量仅支持整数,0或者不设置代表使用HuggingFace,其他值代表使用ModelScope + +之后就可以在命令行中指定魔搭的模型id: + +```shell +python src/train_bash.py \ + --model_name_or_path ZhipuAI/chatglm3-6b \ + ... other arguments +# 在这个链接中可以看到所有可用模型: https://www.modelscope.cn/models +``` + +Web demo目前也支持了魔搭, 在设置环境变量后即可使用: + +```shell +CUDA_VISIBLE_DEVICES=0 python src/train_web.py +``` + ### 单 GPU 训练 > [!IMPORTANT] diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 3930a2e3..c865b4f8 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -43,7 +43,7 @@ def register_model_group( else: assert prefix == name.split("-")[0], "prefix should be identical." - if not os.environ.get('USE_MODELSCOPE_HUB', False): + if not int(os.environ.get('USE_MODELSCOPE_HUB', '0')): # If path is a string, we treat it as a huggingface model-id by default. SUPPORTED_MODELS[name] = path["hf"] if isinstance(path, dict) else path elif isinstance(path, dict) and "ms" in path: diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 85e7ed31..122cd7f2 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -235,7 +235,7 @@ def load_model_and_tokenizer( def try_download_model_from_ms(model_args): - if os.environ.get('USE_MODELSCOPE_HUB', False) and not os.path.exists(model_args.model_name_or_path): + if int(os.environ.get('USE_MODELSCOPE_HUB', '0')) and not os.path.exists(model_args.model_name_or_path): try: from modelscope import snapshot_download revision = model_args.model_revision @@ -243,5 +243,5 @@ def try_download_model_from_ms(model_args): revision = 'master' model_args.model_name_or_path = snapshot_download(model_args.model_name_or_path, revision) except ImportError as e: - raise ImportError(f'You are using `USE_MODELSCOPE_HUB=True` but you have no modelscope sdk installed. ' + raise ImportError(f'You are using `USE_MODELSCOPE_HUB=1` but you have no modelscope sdk installed. ' f'Please install it by `pip install modelscope -U`') from e From eb835b693d22c154d986ba5ff31e5f878e2c00e8 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Fri, 1 Dec 2023 17:27:00 +0800 Subject: [PATCH 5/6] fix bug Former-commit-id: d9e52957e272e8133f1b37cf20d193084425e09e --- src/llmtuner/extras/constants.py | 4 +++- src/llmtuner/webui/common.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index c865b4f8..69f4510d 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -20,7 +20,7 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() -MODELSCOPE_MODELS = OrderedDict() +ALL_OFFICIAL_MODELS = OrderedDict() TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", @@ -43,12 +43,14 @@ def register_model_group( else: assert prefix == name.split("-")[0], "prefix should be identical." + ALL_OFFICIAL_MODELS[name] = [path] if isinstance(path, str) else list(path.values()) if not int(os.environ.get('USE_MODELSCOPE_HUB', '0')): # If path is a string, we treat it as a huggingface model-id by default. SUPPORTED_MODELS[name] = path["hf"] if isinstance(path, dict) else path elif isinstance(path, dict) and "ms" in path: # Use ModelScope modelhub SUPPORTED_MODELS[name] = path["ms"] + print(f'Supported models add {name}/{SUPPORTED_MODELS[name]}') if module is not None: DEFAULT_MODULE[prefix] = module if template is not None: diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py index 55d8942b..b21cad62 100644 --- a/src/llmtuner/webui/common.py +++ b/src/llmtuner/webui/common.py @@ -11,7 +11,7 @@ from transformers.utils import ( ADAPTER_SAFE_WEIGHTS_NAME ) -from llmtuner.extras.constants import DEFAULT_MODULE, DEFAULT_TEMPLATE, SUPPORTED_MODELS, TRAINING_STAGES +from llmtuner.extras.constants import DEFAULT_MODULE, DEFAULT_TEMPLATE, SUPPORTED_MODELS, ALL_OFFICIAL_MODELS, TRAINING_STAGES DEFAULT_CACHE_DIR = "cache" @@ -58,7 +58,10 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona def get_model_path(model_name: str) -> str: user_config = load_config() - return user_config["path_dict"].get(model_name, None) or SUPPORTED_MODELS.get(model_name, "") + cached_path = user_config["path_dict"].get(model_name, None) + if cached_path in ALL_OFFICIAL_MODELS.get(model_name, []): + cached_path = None + return cached_path or SUPPORTED_MODELS.get(model_name, "") def get_prefix(model_name: str) -> str: From fcd61657ee5a7591f88e0211cdc587b99f90bfb2 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Fri, 1 Dec 2023 17:28:23 +0800 Subject: [PATCH 6/6] remove useless code Former-commit-id: 5a2392f105704810e9ce96c13fcc8a555726f9b8 --- src/llmtuner/extras/constants.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 69f4510d..7e66d1b3 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -50,7 +50,6 @@ def register_model_group( elif isinstance(path, dict) and "ms" in path: # Use ModelScope modelhub SUPPORTED_MODELS[name] = path["ms"] - print(f'Supported models add {name}/{SUPPORTED_MODELS[name]}') if module is not None: DEFAULT_MODULE[prefix] = module if template is not None: