From 0ba72273d2259da2c25c0a357fb5951320dca440 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 6 Sep 2023 18:40:11 +0800 Subject: [PATCH] add Baichuan2 models Former-commit-id: 60603a94c667fda5066af742ae1394dadce7a784 --- README.md | 2 ++ README_zh.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index c8e71e9c..aab67c01 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ | [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | | [Falcon](https://huggingface.co/tiiuae/falcon-7b) | 7B/40B | query_key_value | - | | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | W_pack | baichuan | +| [Baichuan2](https://github.com/baichuan-inc/Baichuan2) | 7B/13B | W_pack | baichuan | | [InternLM](https://github.com/InternLM/InternLM) | 7B | q_proj,v_proj | intern | | [Qwen](https://github.com/QwenLM/Qwen-7B) | 7B | c_attn | chatml | | [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | xverse | @@ -462,6 +463,7 @@ Please follow the model licenses to use the corresponding model weights: - [BLOOM](https://huggingface.co/spaces/bigscience/license) - [Falcon](LICENSE) - [Baichuan](https://huggingface.co/baichuan-inc/baichuan-7B/resolve/main/baichuan-7B%20%E6%A8%A1%E5%9E%8B%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) +- [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/resolve/main/Baichuan%202%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) - [InternLM](https://github.com/InternLM/InternLM#open-source-license) - [Qwen](https://huggingface.co/Qwen/Qwen-7B-Chat/blob/main/LICENSE) - [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) diff --git a/README_zh.md b/README_zh.md index 212016b0..0ca5c8b8 100644 --- a/README_zh.md +++ b/README_zh.md @@ -56,6 +56,7 @@ | [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | | [Falcon](https://huggingface.co/tiiuae/falcon-7b) | 7B/40B | query_key_value | - | | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | W_pack | baichuan | +| [Baichuan2](https://github.com/baichuan-inc/Baichuan2) | 7B/13B | W_pack | baichuan | | [InternLM](https://github.com/InternLM/InternLM) | 7B | q_proj,v_proj | intern | | [Qwen](https://github.com/QwenLM/Qwen-7B) | 7B | c_attn | chatml | | [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | xverse | @@ -461,6 +462,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ - [BLOOM](https://huggingface.co/spaces/bigscience/license) - [Falcon](LICENSE) - [Baichuan](https://huggingface.co/baichuan-inc/baichuan-7B/resolve/main/baichuan-7B%20%E6%A8%A1%E5%9E%8B%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) +- [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/resolve/main/Baichuan%202%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) - [InternLM](https://github.com/InternLM/InternLM#open-source-license) - [Qwen](https://huggingface.co/Qwen/Qwen-7B-Chat/blob/main/LICENSE) - [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf)