mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-22 13:42:51 +08:00
support Yi 1.5
Former-commit-id: d12b8f866aa51e5e22d2b3d29704a13308de3e5b
This commit is contained in:
parent
c627d358a9
commit
3d6a80660e
@ -161,7 +161,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
||||||
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
||||||
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
@ -487,7 +487,7 @@ If you have a project that should be incorporated, please contact via email or c
|
|||||||
|
|
||||||
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
||||||
|
|
||||||
Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2/LLaVA-1.5](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
|
@ -161,7 +161,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
||||||
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
||||||
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
@ -487,7 +487,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
|
|||||||
|
|
||||||
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
||||||
|
|
||||||
使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2/LLaVA-1.5](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## 引用
|
## 引用
|
||||||
|
|
||||||
|
@ -320,14 +320,14 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base",
|
||||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base",
|
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base",
|
||||||
},
|
},
|
||||||
|
"DeepSeek-MoE-236B-Base": {
|
||||||
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2",
|
||||||
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2",
|
||||||
|
},
|
||||||
"DeepSeek-MoE-16B-Chat": {
|
"DeepSeek-MoE-16B-Chat": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat",
|
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat",
|
||||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat",
|
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat",
|
||||||
},
|
},
|
||||||
"DeepSeek-MoE-236B": {
|
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2",
|
|
||||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2",
|
|
||||||
},
|
|
||||||
"DeepSeek-MoE-236B-Chat": {
|
"DeepSeek-MoE-236B-Chat": {
|
||||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat",
|
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat",
|
||||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat",
|
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat",
|
||||||
@ -424,13 +424,13 @@ register_model_group(
|
|||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"CodeGemma-2B": {
|
"CodeGemma-2B": {
|
||||||
DownloadSource.DEFAULT: "google/codegemma-2b",
|
DownloadSource.DEFAULT: "google/codegemma-1.1-2b",
|
||||||
},
|
},
|
||||||
"CodeGemma-7B": {
|
"CodeGemma-7B": {
|
||||||
DownloadSource.DEFAULT: "google/codegemma-7b",
|
DownloadSource.DEFAULT: "google/codegemma-7b",
|
||||||
},
|
},
|
||||||
"CodeGemma-7B-Chat": {
|
"CodeGemma-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "google/codegemma-7b-it",
|
DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it",
|
||||||
DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it",
|
DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -581,6 +581,9 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
||||||
},
|
},
|
||||||
|
"LLaMA3-70B-Chinese-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="llama3",
|
template="llama3",
|
||||||
)
|
)
|
||||||
@ -1174,6 +1177,30 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-4bits",
|
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-4bits",
|
||||||
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-4bits",
|
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-4bits",
|
||||||
},
|
},
|
||||||
|
"Yi-1.5-6B": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B",
|
||||||
|
},
|
||||||
|
"Yi-1.5-9B": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-9B",
|
||||||
|
},
|
||||||
|
"Yi-1.5-34B": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-34B",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-34B",
|
||||||
|
},
|
||||||
|
"Yi-1.5-6B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat",
|
||||||
|
},
|
||||||
|
"Yi-1.5-9B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-9B-Chat",
|
||||||
|
},
|
||||||
|
"Yi-1.5-34B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "01-ai/Yi-1.5-34B-Chat",
|
||||||
|
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-34B-Chat",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="yi",
|
template="yi",
|
||||||
)
|
)
|
||||||
|
@ -302,7 +302,7 @@ def llama_sdpa_attention_forward(
|
|||||||
|
|
||||||
|
|
||||||
def _apply_llama_patch() -> None:
|
def _apply_llama_patch() -> None:
|
||||||
require_version("transformers==4.40.1", "To fix: pip install transformers==4.40.1")
|
require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
|
||||||
LlamaAttention.forward = llama_attention_forward
|
LlamaAttention.forward = llama_attention_forward
|
||||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||||
|
Loading…
x
Reference in New Issue
Block a user