add Gemma2 models

Former-commit-id: 6f63050e1b61742d5f7e48bdc62c46748031d7cb
This commit is contained in:
hiyouga 2024-06-28 01:26:50 +08:00
parent 2105cf6000
commit d3b7c489f2
4 changed files with 15 additions and 3 deletions

View File

@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -507,6 +507,18 @@ register_model_group(
"Gemma-1.1-7B-Chat": { "Gemma-1.1-7B-Chat": {
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it", DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
}, },
"Gemma-2-9B": {
DownloadSource.DEFAULT: "google/gemma-2-9b",
},
"Gemma-2-27B": {
DownloadSource.DEFAULT: "google/gemma-2-27b",
},
"Gemma-2-9B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
},
"Gemma-2-27B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
},
}, },
template="gemma", template="gemma",
) )

View File

@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
Subclass and override to inject custom behavior. Subclass and override to inject custom behavior.
Note that the first element will be removed from the output tuple. Note that the first element will be removed from the output tuple.
See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777 See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
""" """
# Compute rewards # Compute rewards
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True) _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)