Merge pull request #5922 from Tuyohai/main

support granite3 models

Former-commit-id: c23a4d0658323434c386716c25855711202e37a9
This commit is contained in:
hoshi-hiyouga 2024-12-23 16:46:02 +08:00 committed by GitHub
commit 8265d6a228
4 changed files with 53 additions and 0 deletions

View File

@ -194,6 +194,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
| [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -195,6 +195,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
| [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -696,6 +696,18 @@ _register_template(
) )
_register_template(
name="granite3",
format_user=StringFormatter(slots=["<|start_of_role|>user<|end_of_role|>{{content}}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>"]),
format_system=StringFormatter(slots=["<|start_of_role|>system<|end_of_role|>{{content}}<|end_of_text|>\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|end_of_text|>\n"]),
format_separator=EmptyFormatter(slots=["\n"]),
stop_words=["<|end_of_text|>"],
replace_eos=True,
replace_jinja_template=True,
)
_register_template( _register_template(
name="index", name="index",
format_user=StringFormatter(slots=["reserved_0{{content}}reserved_1"]), format_user=StringFormatter(slots=["reserved_0{{content}}reserved_1"]),

View File

@ -583,6 +583,45 @@ register_model_group(
) )
register_model_group(
models={
"Granite-3.0-8B-Instruct": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-instruct",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-instruct",
},
"Granite-3.0-8B-Base": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-base",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-base",
},
"Granite-3.0-2B-Instruct": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-instruct",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-instruct",
},
"Granite-3.0-2B-Base": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-base",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-base",
},
"Granite-3.0-3B-A800M-Instruct": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-instruct",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-instruct",
},
"Granite-3.0-3B-A800M-Base": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-base",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-base",
},
"Granite-3.0-1B-A400M-Instruct": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-instruct",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-instruct",
},
"Granite-3.0-1B-A400M-Base": {
DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-base",
DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-base",
},
},
template="granite3",
)
register_model_group( register_model_group(
models={ models={
"Index-1.9B-Chat": { "Index-1.9B-Chat": {