Merge pull request #6379 from hiyouga/hiyouga/add_paligemma2

[model] add paligemma2

Former-commit-id: 933647e6806428a608c2f0fd90b8ea1ea84cdd89
This commit is contained in:
hoshi-hiyouga 2024-12-18 17:03:11 +08:00 committed by GitHub
commit 910884065e
5 changed files with 69 additions and 30 deletions

View File

@ -32,10 +32,6 @@ Choose your path:
- **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
- **Amazon SageMaker**: [Blog](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) - **Amazon SageMaker**: [Blog](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
Recent activities:
- **2024/10/18-2024/11/30**: Build a personal tour guide bot using PAI+LLaMA Factory. [[website]](https://developer.aliyun.com/topic/llamafactory2)
> [!NOTE] > [!NOTE]
> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them. > Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them.
@ -206,7 +202,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | paligemma | | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/14B | phi | | [Phi-3](https://huggingface.co/microsoft) | 4B/14B | phi |
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
@ -215,7 +211,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/12B/35B/115B | telechat2 | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi | | [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | | [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |

View File

@ -33,10 +33,6 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
- **PAI-DSW**[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **PAI-DSW**[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
- **Amazon SageMaker**[博客](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) - **Amazon SageMaker**[博客](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
近期活动:
- **2024/10/18-2024/11/30**:使用 PAI+LLaMA Factory 构建个性化导游机器人。[[活动页面]](https://developer.aliyun.com/topic/llamafactory2)
> [!NOTE] > [!NOTE]
> 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。 > 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。
@ -207,15 +203,16 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | paligemma | | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | | [Phi-3](https://huggingface.co/microsoft) | 4B/14B | phi |
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/12B/35B/115B | telechat2 | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi | | [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | | [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |

View File

@ -1159,7 +1159,6 @@ _register_template(
default_system=( default_system=(
"你是中国电信星辰语义大模型英文名是TeleChat你是由中电信人工智能科技有限公司和中国电信人工智能研究院TeleAI研发的人工智能助手。" "你是中国电信星辰语义大模型英文名是TeleChat你是由中电信人工智能科技有限公司和中国电信人工智能研究院TeleAI研发的人工智能助手。"
), ),
replace_jinja_template=False,
) )

View File

@ -105,7 +105,7 @@ def register_model_group(
) -> None: ) -> None:
for name, path in models.items(): for name, path in models.items():
SUPPORTED_MODELS[name] = path SUPPORTED_MODELS[name] = path
if template is not None and any(suffix in name for suffix in ("-Chat", "-Instruct")): if template is not None and (any(suffix in name for suffix in ("-Chat", "-Instruct")) or vision):
DEFAULT_TEMPLATE[name] = template DEFAULT_TEMPLATE[name] = template
if vision: if vision:
VISION_MODELS.add(name) VISION_MODELS.add(name)
@ -848,10 +848,18 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"Llama-3.2-11B-Vision": {
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-11B-Vision",
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-11B-Vision",
},
"Llama-3.2-11B-Vision-Instruct": { "Llama-3.2-11B-Vision-Instruct": {
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-11B-Vision-Instruct", DownloadSource.DEFAULT: "meta-llama/Llama-3.2-11B-Vision-Instruct",
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-11B-Vision-Instruct", DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-11B-Vision-Instruct",
}, },
"Llama-3.2-90B-Vision": {
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-90B-Vision",
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-90B-Vision",
},
"Llama-3.2-90B-Vision-Instruct": { "Llama-3.2-90B-Vision-Instruct": {
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-90B-Vision-Instruct", DownloadSource.DEFAULT: "meta-llama/Llama-3.2-90B-Vision-Instruct",
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-90B-Vision-Instruct", DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-90B-Vision-Instruct",
@ -1175,23 +1183,23 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"PaliGemma-3B-pt-224-Chat": { "PaliGemma-3B-pt-224": {
DownloadSource.DEFAULT: "google/paligemma-3b-pt-224", DownloadSource.DEFAULT: "google/paligemma-3b-pt-224",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-224", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-224",
}, },
"PaliGemma-3B-pt-448-Chat": { "PaliGemma-3B-pt-448": {
DownloadSource.DEFAULT: "google/paligemma-3b-pt-448", DownloadSource.DEFAULT: "google/paligemma-3b-pt-448",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-448", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-448",
}, },
"PaliGemma-3B-pt-896-Chat": { "PaliGemma-3B-pt-896": {
DownloadSource.DEFAULT: "google/paligemma-3b-pt-896", DownloadSource.DEFAULT: "google/paligemma-3b-pt-896",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-896", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-896",
}, },
"PaliGemma-3B-mix-224-Chat": { "PaliGemma-3B-mix-224": {
DownloadSource.DEFAULT: "google/paligemma-3b-mix-224", DownloadSource.DEFAULT: "google/paligemma-3b-mix-224",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-224", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-224",
}, },
"PaliGemma-3B-mix-448-Chat": { "PaliGemma-3B-mix-448": {
DownloadSource.DEFAULT: "google/paligemma-3b-mix-448", DownloadSource.DEFAULT: "google/paligemma-3b-mix-448",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-448", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-448",
}, },
@ -1201,6 +1209,43 @@ register_model_group(
) )
register_model_group(
models={
"PaliGemma2-3B-pt-224": {
DownloadSource.DEFAULT: "google/paligemma2-3b-pt-224",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-3b-pt-224",
},
"PaliGemma2-3B-pt-448": {
DownloadSource.DEFAULT: "google/paligemma2-3b-pt-448",
},
"PaliGemma2-3B-pt-896": {
DownloadSource.DEFAULT: "google/paligemma2-3b-pt-896",
},
"PaliGemma2-10B-pt-224": {
DownloadSource.DEFAULT: "google/paligemma2-10b-pt-224",
},
"PaliGemma2-10B-pt-448": {
DownloadSource.DEFAULT: "google/paligemma2-10b-pt-448",
},
"PaliGemma2-10B-pt-896": {
DownloadSource.DEFAULT: "google/paligemma2-10b-pt-896",
DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-10b-pt-896",
},
"PaliGemma2-28B-pt-224": {
DownloadSource.DEFAULT: "google/paligemma2-28b-pt-224",
},
"PaliGemma2-28B-pt-448": {
DownloadSource.DEFAULT: "google/paligemma2-28b-pt-448",
},
"PaliGemma2-28B-pt-896": {
DownloadSource.DEFAULT: "google/paligemma2-28b-pt-896",
},
},
template="paligemma",
vision=True,
)
register_model_group( register_model_group(
models={ models={
"Phi-1.5-1.3B": { "Phi-1.5-1.3B": {
@ -1255,7 +1300,7 @@ register_model_group(
register_model_group( register_model_group(
models={ models={
"Pixtral-12B-Chat": { "Pixtral-12B-Instruct": {
DownloadSource.DEFAULT: "mistral-community/pixtral-12b", DownloadSource.DEFAULT: "mistral-community/pixtral-12b",
DownloadSource.MODELSCOPE: "AI-ModelScope/pixtral-12b", DownloadSource.MODELSCOPE: "AI-ModelScope/pixtral-12b",
} }
@ -1958,10 +2003,13 @@ register_model_group(
DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt", DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt",
}, },
"TeleChat-12B-Chat": { "TeleChat-12B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt", DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt",
} },
"TeleChat-52B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat-52B",
},
}, },
template="telechat", template="telechat",
) )
@ -1977,13 +2025,8 @@ register_model_group(
DownloadSource.DEFAULT: "Tele-AI/TeleChat2-7B", DownloadSource.DEFAULT: "Tele-AI/TeleChat2-7B",
DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-7B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-7B",
}, },
"TeleChat2-12B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
},
"TeleChat2-35B-Chat": { "TeleChat2-35B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat2-35B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-35B-Nov",
DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-35B",
}, },
"TeleChat2-115B-Chat": { "TeleChat2-115B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat2-115B", DownloadSource.DEFAULT: "Tele-AI/TeleChat2-115B",

View File

@ -30,15 +30,19 @@ LOCALES = {
"model_name": { "model_name": {
"en": { "en": {
"label": "Model name", "label": "Model name",
"info": "Input the name prefix to search for the model.",
}, },
"ru": { "ru": {
"label": "Название модели", "label": "Название модели",
"info": "Введите префикс имени для поиска модели.",
}, },
"zh": { "zh": {
"label": "模型名称", "label": "模型名称",
"info": "输入首单词以检索模型。",
}, },
"ko": { "ko": {
"label": "모델 이름", "label": "모델 이름",
"info": "모델을 검색하기 위해 이름 접두어를 입력하세요.",
}, },
}, },
"model_path": { "model_path": {