From 9debd64cef2e1e24b6dd8c944d349b6ec58a3942 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Tue, 6 Feb 2024 14:57:23 +0800 Subject: [PATCH] add models Former-commit-id: 85622ae757e2ffe7f3da15f0a9123e8410d82b28 --- src/llmtuner/data/template.py | 3 ++- src/llmtuner/extras/constants.py | 33 ++++++++++++++------------------ 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 03f67a5b..cf4c6d53 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -376,7 +376,8 @@ register_template( register_template( name="deepseekcoder", - format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:\n"]), + format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]), + format_assistant=StringFormatter(slots=["\n", "{{content}}"]), format_separator=EmptyFormatter(slots=["\n", {"token": "<|EOT|>"}, "\n"]), default_system=( "You are an AI programming assistant, utilizing the Deepseek Coder model, " diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 73a90a84..cfa69730 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -219,22 +219,32 @@ register_model_group( register_model_group( models={ - "DeepSeekLLM-7B-Base": { + "DeepSeek-LLM-7B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-base", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base", }, - "DeepSeekLLM-67B-Base": { + "DeepSeek-LLM-67B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-base", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base", }, - "DeepSeekLLM-7B-Chat": { + "DeepSeek-LLM-7B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-chat", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat", }, - "DeepSeekLLM-67B-Chat": { + "DeepSeek-LLM-67B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat", }, + "DeepSeek-Math-7B-Base": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-base"}, + "DeepSeek-Math-7B-Chat": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-instruct"}, + "DeepSeek-MoE-16B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base", + }, + "DeepSeek-MoE-16B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat", + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat", + }, }, template="deepseek", ) @@ -265,21 +275,6 @@ register_model_group( ) -register_model_group( - models={ - "DeepSeekMoE-16B-Base": { - DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base", - }, - "DeepSeekMoE-16B-Chat": { - DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat", - }, - }, - template="deepseek", -) - - register_model_group( models={ "Falcon-7B": {