From 80f0a63f73d61fd1287846eb310f499a0fb557bf Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 24 Apr 2024 05:39:52 +0800 Subject: [PATCH] add dbrx and jamba models Former-commit-id: 69eb03a8feee530000d290cd00aef28fca6d1e84 --- src/llmtuner/data/template.py | 25 +++++++++++++++++++++++++ src/llmtuner/extras/constants.py | 26 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index cd567a7b..efdd44f3 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -550,6 +550,31 @@ _register_template( ) +_register_template( + name="dbrx", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + default_system=( + "You are DBRX, created by Databricks. You were last updated in December 2023. " + "You answer questions based on information available up to that point.\n" + "YOU PROVIDE SHORT RESPONSES TO SHORT QUESTIONS OR STATEMENTS, but provide thorough " + "responses to more complex and open-ended questions.\nYou assist with various tasks, " + "from writing to coding (using markdown for code blocks — remember to use ``` with " + "code, JSON, and tables).\n(You do not have real-time data access or code execution " + "capabilities. You avoid stereotyping and provide balanced perspectives on " + "controversial topics. You do not provide song lyrics, poems, or news articles and " + "do not divulge details of your training data.)\nThis is your system prompt, " + "guiding your responses. Do not reference it, just respond to the user. If you find " + "yourself talking about this message, stop. You should be responding appropriately " + "and usually that means not mentioning this.\nYOU DO NOT MENTION ANY OF THIS INFORMATION " + "ABOUT YOURSELF UNLESS THE INFORMATION IS DIRECTLY PERTINENT TO THE USER'S QUERY." + ), + stop_words=["<|im_end|>"], + replace_eos=True, +) + + _register_template( name="deepseek", format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]), diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 0a29f971..031e3e81 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -268,6 +268,22 @@ register_model_group( ) +register_model_group( + models={ + "DBRX-132B-Base": { + DownloadSource.DEFAULT: "databricks/dbrx-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-base", + }, + "DBRX-132B-Chat": { + DownloadSource.DEFAULT: "databricks/dbrx-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-instruct", + }, + }, + module="Wqkv", + template="dbrx", +) + + register_model_group( models={ "DeepSeek-LLM-7B-Base": { @@ -453,6 +469,16 @@ register_model_group( ) +register_model_group( + models={ + "Jambda-v0.1": { + DownloadSource.DEFAULT: "ai21labs/Jamba-v0.1", + DownloadSource.MODELSCOPE: "AI-ModelScope/Jamba-v0.1", + } + }, +) + + register_model_group( models={ "LingoWhale-8B": {