From 9771acfd750285a3303279310dfab8c7462a7ed2 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 14 Jan 2024 00:14:49 +0800 Subject: [PATCH] support deepseek moe Former-commit-id: ca3933dc5295bd8d9e5e37ce869ff8fb44761047 --- README.md | 1 + src/llmtuner/extras/constants.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/README.md b/README.md index 1b804bcc..a3e4d78c 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [BLOOM](https://huggingface.co/bigscience/bloom) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | | [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | | [ChatGLM3](https://huggingface.co/THUDM/chatglm3-6b) | 6B | query_key_value | chatglm3 | +| [Deepseek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B | q_proj,v_proj | deepseek | | [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon | | [InternLM](https://huggingface.co/internlm) | 7B/20B | q_proj,v_proj | intern | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 113f6a10..029fb76d 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -269,6 +269,21 @@ register_model_group( ) +register_model_group( + models={ + "DeepseekMoE-16B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base" + }, + "DeepseekMoE-16B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat" + } + }, + template="deepseek" +) + + register_model_group( models={ "Falcon-7B": {