diff --git a/README.md b/README.md index c8356bb6..ce6a5ee2 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | -| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | +| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | @@ -759,7 +759,7 @@ If you have a project that should be incorporated, please contact via email or c This repository is licensed under the [Apache-2.0 License](LICENSE). -Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## Citation diff --git a/README_zh.md b/README_zh.md index 94a4fb4f..e8cb1db4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -195,7 +195,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | -| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | +| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | @@ -760,7 +760,7 @@ swanlab_run_name: test_run # 可选 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。 -使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## 引用 diff --git a/assets/wechat.jpg b/assets/wechat.jpg index d933efed..8baee82f 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg index 60c311a9..e8ccfecb 100644 Binary files a/assets/wechat_npu.jpg and b/assets/wechat_npu.jpg differ diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3c596832..4c3b70bf 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -698,13 +698,13 @@ _register_template( _register_template( name="granite3", - format_user=StringFormatter(slots=["<|start_of_role|>user<|end_of_role|>{{content}}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>"]), + format_user=StringFormatter( + slots=[ + "<|start_of_role|>user<|end_of_role|>{{content}}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>" + ] + ), format_system=StringFormatter(slots=["<|start_of_role|>system<|end_of_role|>{{content}}<|end_of_text|>\n"]), - format_assistant=StringFormatter(slots=["{{content}}<|end_of_text|>\n"]), format_separator=EmptyFormatter(slots=["\n"]), - stop_words=["<|end_of_text|>"], - replace_eos=True, - replace_jinja_template=True, ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index c152f9eb..db360730 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -325,6 +325,7 @@ register_model_group( models={ "Codestral-22B-v0.1-Chat": { DownloadSource.DEFAULT: "mistralai/Codestral-22B-v0.1", + DownloadSource.MODELSCOPE: "swift/Codestral-22B-v0.1", }, }, template="mistral", @@ -420,15 +421,19 @@ register_model_group( }, "DeepSeek-Coder-V2-16B-Base": { DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", }, "DeepSeek-Coder-V2-236B-Base": { DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Base", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-Coder-V2-Base", }, "DeepSeek-Coder-V2-16B-Instruct": { DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", }, "DeepSeek-Coder-V2-236B-Instruct": { DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Instruct", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-Coder-V2-Instruct", }, }, template="deepseek", @@ -482,6 +487,7 @@ register_model_group( }, "Falcon-11B": { DownloadSource.DEFAULT: "tiiuae/falcon-11B", + DownloadSource.MODELSCOPE: "tiiuae/falcon-11B", }, "Falcon-40B": { DownloadSource.DEFAULT: "tiiuae/falcon-40b", @@ -585,37 +591,69 @@ register_model_group( register_model_group( models={ - "Granite-3.0-8B-Instruct": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-instruct", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-instruct", - }, - "Granite-3.0-8B-Base": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-base", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-base", - }, - "Granite-3.0-2B-Instruct": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-instruct", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-instruct", - }, - "Granite-3.0-2B-Base": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-base", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-base", - }, - "Granite-3.0-3B-A800M-Instruct": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-instruct", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-instruct", + "Granite-3.0-1B-A400M-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-base", }, "Granite-3.0-3B-A800M-Base": { DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-base", DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-base", }, + "Granite-3.0-2B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-base", + }, + "Granite-3.0-8B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-base", + }, "Granite-3.0-1B-A400M-Instruct": { DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-instruct", DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-instruct", }, - "Granite-3.0-1B-A400M-Base": { - DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-base", - DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-base", + "Granite-3.0-3B-A800M-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-instruct", + }, + "Granite-3.0-2B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-instruct", + }, + "Granite-3.0-8B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-instruct", + }, + "Granite-3.1-1B-A400M-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-1b-a400m-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-1b-a400m-base", + }, + "Granite-3.1-3B-A800M-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-3b-a800m-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-3b-a800m-base", + }, + "Granite-3.1-2B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-2b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-2b-base", + }, + "Granite-3.1-8B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-8b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-8b-base", + }, + "Granite-3.1-1B-A400M-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-1b-a400m-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-1b-a400m-instruct", + }, + "Granite-3.1-3B-A800M-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-3b-a800m-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-3b-a800m-instruct", + }, + "Granite-3.1-2B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-2b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-2b-instruct", + }, + "Granite-3.1-8B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.1-8b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.1-8b-instruct", }, }, template="granite3", @@ -624,14 +662,6 @@ register_model_group( register_model_group( models={ - "Index-1.9B-Chat": { - DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-Chat", - DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-Chat", - }, - "Index-1.9B-Character-Chat": { - DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-Character", - DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-Character", - }, "Index-1.9B-Base": { DownloadSource.DEFAULT: "IndexTeam/Index-1.9B", DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B", @@ -640,6 +670,14 @@ register_model_group( DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-Pure", DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-Pure", }, + "Index-1.9B-Chat": { + DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-Chat", + DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-Chat", + }, + "Index-1.9B-Character-Chat": { + DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-Character", + DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-Character", + }, "Index-1.9B-Chat-32K": { DownloadSource.DEFAULT: "IndexTeam/Index-1.9B-32K", DownloadSource.MODELSCOPE: "IndexTeam/Index-1.9B-32K", @@ -1256,15 +1294,19 @@ register_model_group( }, "PaliGemma2-3B-pt-448": { DownloadSource.DEFAULT: "google/paligemma2-3b-pt-448", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-3b-pt-448", }, "PaliGemma2-3B-pt-896": { DownloadSource.DEFAULT: "google/paligemma2-3b-pt-896", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-3b-pt-896", }, "PaliGemma2-10B-pt-224": { DownloadSource.DEFAULT: "google/paligemma2-10b-pt-224", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-10b-pt-224", }, "PaliGemma2-10B-pt-448": { DownloadSource.DEFAULT: "google/paligemma2-10b-pt-448", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-10b-pt-448", }, "PaliGemma2-10B-pt-896": { DownloadSource.DEFAULT: "google/paligemma2-10b-pt-896", @@ -1272,12 +1314,15 @@ register_model_group( }, "PaliGemma2-28B-pt-224": { DownloadSource.DEFAULT: "google/paligemma2-28b-pt-224", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-28b-pt-224", }, "PaliGemma2-28B-pt-448": { DownloadSource.DEFAULT: "google/paligemma2-28b-pt-448", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-28b-pt-448", }, "PaliGemma2-28B-pt-896": { DownloadSource.DEFAULT: "google/paligemma2-28b-pt-896", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-28b-pt-896", }, }, template="paligemma", @@ -2006,6 +2051,7 @@ register_model_group( models={ "Skywork-o1-Open-Llama-3.1-8B": { DownloadSource.DEFAULT: "Skywork/Skywork-o1-Open-Llama-3.1-8B", + DownloadSource.MODELSCOPE: "AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B", } }, template="skywork_o1",