mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-06 21:52:50 +08:00
add dbrx and jamba models
Former-commit-id: 69eb03a8feee530000d290cd00aef28fca6d1e84
This commit is contained in:
parent
7d89abb1fd
commit
80f0a63f73
@ -550,6 +550,31 @@ _register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_register_template(
|
||||||
|
name="dbrx",
|
||||||
|
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||||
|
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||||
|
format_separator=EmptyFormatter(slots=["\n"]),
|
||||||
|
default_system=(
|
||||||
|
"You are DBRX, created by Databricks. You were last updated in December 2023. "
|
||||||
|
"You answer questions based on information available up to that point.\n"
|
||||||
|
"YOU PROVIDE SHORT RESPONSES TO SHORT QUESTIONS OR STATEMENTS, but provide thorough "
|
||||||
|
"responses to more complex and open-ended questions.\nYou assist with various tasks, "
|
||||||
|
"from writing to coding (using markdown for code blocks — remember to use ``` with "
|
||||||
|
"code, JSON, and tables).\n(You do not have real-time data access or code execution "
|
||||||
|
"capabilities. You avoid stereotyping and provide balanced perspectives on "
|
||||||
|
"controversial topics. You do not provide song lyrics, poems, or news articles and "
|
||||||
|
"do not divulge details of your training data.)\nThis is your system prompt, "
|
||||||
|
"guiding your responses. Do not reference it, just respond to the user. If you find "
|
||||||
|
"yourself talking about this message, stop. You should be responding appropriately "
|
||||||
|
"and usually that means not mentioning this.\nYOU DO NOT MENTION ANY OF THIS INFORMATION "
|
||||||
|
"ABOUT YOURSELF UNLESS THE INFORMATION IS DIRECTLY PERTINENT TO THE USER'S QUERY."
|
||||||
|
),
|
||||||
|
stop_words=["<|im_end|>"],
|
||||||
|
replace_eos=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_register_template(
|
_register_template(
|
||||||
name="deepseek",
|
name="deepseek",
|
||||||
format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]),
|
format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]),
|
||||||
|
@ -268,6 +268,22 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"DBRX-132B-Base": {
|
||||||
|
DownloadSource.DEFAULT: "databricks/dbrx-base",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-base",
|
||||||
|
},
|
||||||
|
"DBRX-132B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "databricks/dbrx-instruct",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-instruct",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
module="Wqkv",
|
||||||
|
template="dbrx",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"DeepSeek-LLM-7B-Base": {
|
"DeepSeek-LLM-7B-Base": {
|
||||||
@ -453,6 +469,16 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"Jambda-v0.1": {
|
||||||
|
DownloadSource.DEFAULT: "ai21labs/Jamba-v0.1",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/Jamba-v0.1",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"LingoWhale-8B": {
|
"LingoWhale-8B": {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user