From 22f71c152ac3b8e92cfa3530d4727b3d25298cf8 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 17 May 2024 00:08:33 +0800 Subject: [PATCH] add falcon 11b Former-commit-id: d77bed4091a6a8fea682b39d3261e1e93dfe093f --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/extras/constants.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d392b19b..a41415fd 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere | | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | | [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | diff --git a/README_zh.md b/README_zh.md index 58398a31..4f8ffa28 100644 --- a/README_zh.md +++ b/README_zh.md @@ -149,7 +149,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere | | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | | [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index f1ee55a0..40826163 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -375,6 +375,9 @@ register_model_group( DownloadSource.DEFAULT: "tiiuae/falcon-7b", DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b", }, + "Falcon-11B": { + DownloadSource.DEFAULT: "tiiuae/falcon-11B", + }, "Falcon-40B": { DownloadSource.DEFAULT: "tiiuae/falcon-40b", DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b",