mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	add Gemma2 models
Former-commit-id: 8fc5a248ecfd6861cb90dac6c14fe89cdeaf8921
This commit is contained in:
		
							parent
							
								
									024760f866
								
							
						
					
					
						commit
						42e7489713
					
				@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | 
			
		||||
| [Command-R](https://huggingface.co/CohereForAI)           | 35B/104B                         | cohere    |
 | 
			
		||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B                  | deepseek  |
 | 
			
		||||
| [Falcon](https://huggingface.co/tiiuae)                   | 7B/11B/40B/180B                  | falcon    |
 | 
			
		||||
| [Gemma/CodeGemma](https://huggingface.co/google)          | 2B/7B                            | gemma     |
 | 
			
		||||
| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google)   | 2B/7B/9B/27B                     | gemma     |
 | 
			
		||||
| [GLM4](https://huggingface.co/THUDM)                      | 9B                               | glm4      |
 | 
			
		||||
| [InternLM2](https://huggingface.co/internlm)              | 7B/20B                           | intern2   |
 | 
			
		||||
| [LLaMA](https://github.com/facebookresearch/llama)        | 7B/13B/33B/65B                   | -         |
 | 
			
		||||
 | 
			
		||||
@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | 
			
		||||
| [Command-R](https://huggingface.co/CohereForAI)           | 35B/104B                         | cohere    |
 | 
			
		||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B                  | deepseek  |
 | 
			
		||||
| [Falcon](https://huggingface.co/tiiuae)                   | 7B/11B/40B/180B                  | falcon    |
 | 
			
		||||
| [Gemma/CodeGemma](https://huggingface.co/google)          | 2B/7B                            | gemma     |
 | 
			
		||||
| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google)   | 2B/7B/9B/27B                     | gemma     |
 | 
			
		||||
| [GLM4](https://huggingface.co/THUDM)                      | 9B                               | glm4      |
 | 
			
		||||
| [InternLM2](https://huggingface.co/internlm)              | 7B/20B                           | intern2   |
 | 
			
		||||
| [LLaMA](https://github.com/facebookresearch/llama)        | 7B/13B/33B/65B                   | -         |
 | 
			
		||||
 | 
			
		||||
@ -507,6 +507,18 @@ register_model_group(
 | 
			
		||||
        "Gemma-1.1-7B-Chat": {
 | 
			
		||||
            DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
 | 
			
		||||
        },
 | 
			
		||||
        "Gemma-2-9B": {
 | 
			
		||||
            DownloadSource.DEFAULT: "google/gemma-2-9b",
 | 
			
		||||
        },
 | 
			
		||||
        "Gemma-2-27B": {
 | 
			
		||||
            DownloadSource.DEFAULT: "google/gemma-2-27b",
 | 
			
		||||
        },
 | 
			
		||||
        "Gemma-2-9B-Chat": {
 | 
			
		||||
            DownloadSource.DEFAULT: "google/gemma-2-9b-it",
 | 
			
		||||
        },
 | 
			
		||||
        "Gemma-2-27B-Chat": {
 | 
			
		||||
            DownloadSource.DEFAULT: "google/gemma-2-27b-it",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="gemma",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
 | 
			
		||||
        Subclass and override to inject custom behavior.
 | 
			
		||||
 | 
			
		||||
        Note that the first element will be removed from the output tuple.
 | 
			
		||||
        See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777
 | 
			
		||||
        See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
 | 
			
		||||
        """
 | 
			
		||||
        # Compute rewards
 | 
			
		||||
        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user