add Gemma2 models

Former-commit-id: 8fc5a248ecfd6861cb90dac6c14fe89cdeaf8921
This commit is contained in:
hiyouga 2024-06-28 01:26:50 +08:00
parent 024760f866
commit 42e7489713
4 changed files with 15 additions and 3 deletions

View File

@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -507,6 +507,18 @@ register_model_group(
"Gemma-1.1-7B-Chat": { "Gemma-1.1-7B-Chat": {
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it", DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
}, },
"Gemma-2-9B": {
DownloadSource.DEFAULT: "google/gemma-2-9b",
},
"Gemma-2-27B": {
DownloadSource.DEFAULT: "google/gemma-2-27b",
},
"Gemma-2-9B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
},
"Gemma-2-27B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
},
}, },
template="gemma", template="gemma",
) )

View File

@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
Subclass and override to inject custom behavior. Subclass and override to inject custom behavior.
Note that the first element will be removed from the output tuple. Note that the first element will be removed from the output tuple.
See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777 See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
""" """
# Compute rewards # Compute rewards
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True) _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)