From 6574a721d20108174fc770caf9f17cf8fa81c4b4 Mon Sep 17 00:00:00 2001 From: marko1616 Date: Sat, 13 Apr 2024 04:31:33 +0800 Subject: [PATCH 1/9] Add template&support(Not tested) --- src/llmtuner/data/template.py | 8 ++++++++ src/llmtuner/extras/constants.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 52358c1e..aefa3e45 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -785,3 +785,11 @@ _register_template( format_user=StringFormatter(slots=[":{{content}}\n:"]), format_separator=EmptyFormatter(slots=["\n"]), ) + +_register_template( + name="c4ai", + format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), + format_assistant=StringFormatter(slots=["{{content}}", "<|END_OF_TURN_TOKEN|>"]), + format_system=StringFormatter(slots=[{"bos_token"},"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>"]), + default_system="You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.", +) \ No newline at end of file diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 729e0fa6..0cd2f987 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -994,3 +994,13 @@ register_model_group( }, template="atom", ) + +register_model_group( + models={ + "C4AI-Command-R-35B": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-v01", + DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-v01", + } + }, + template="c4ai", +) \ No newline at end of file From d0705518ee85408364172803ca3dd68e978e829e Mon Sep 17 00:00:00 2001 From: marko1616 Date: Sat, 13 Apr 2024 07:32:40 +0800 Subject: [PATCH 2/9] Add c4ai-command-r-plus link --- src/llmtuner/extras/constants.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 0cd2f987..cf68a225 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1003,4 +1003,14 @@ register_model_group( } }, template="c4ai", +) + +register_model_group( + models={ + "C4AI-Command-R-plus-104B": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-plus", + DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-plus", + } + }, + template="c4ai", ) \ No newline at end of file From 42806323f068e4857fce84682a9cb571c17b52d3 Mon Sep 17 00:00:00 2001 From: marko1616 Date: Sat, 13 Apr 2024 07:52:11 +0800 Subject: [PATCH 3/9] Typo fix --- src/llmtuner/data/template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index aefa3e45..f409cd9a 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -786,6 +786,7 @@ _register_template( format_separator=EmptyFormatter(slots=["\n"]), ) + _register_template( name="c4ai", format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), From ab033dac4fde392b18955984e40473a923f745af Mon Sep 17 00:00:00 2001 From: marko1616 Date: Sat, 13 Apr 2024 17:30:21 +0800 Subject: [PATCH 4/9] Typo fix --- src/llmtuner/extras/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index cf68a225..5b81a9a9 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -995,6 +995,7 @@ register_model_group( template="atom", ) + register_model_group( models={ "C4AI-Command-R-35B": { @@ -1005,6 +1006,7 @@ register_model_group( template="c4ai", ) + register_model_group( models={ "C4AI-Command-R-plus-104B": { From d7b9bbc8b956a4aeb16b5b33179687b0972ae9a6 Mon Sep 17 00:00:00 2001 From: marko1616 Date: Mon, 15 Apr 2024 20:16:52 +0800 Subject: [PATCH 5/9] Add support for function call(Not strictly following origin) --- src/llmtuner/data/template.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index f409cd9a..b41d2642 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -792,5 +792,21 @@ _register_template( format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), format_assistant=StringFormatter(slots=["{{content}}", "<|END_OF_TURN_TOKEN|>"]), format_system=StringFormatter(slots=[{"bos_token"},"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>"]), - default_system="You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.", + format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), + format_observation=StringFormatter( + slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}","<|END_OF_TURN_TOKEN|>","<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"] + ), + default_system=("# Safety Preamble\n", + "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n", + "# System Preamble\n", + "## Basic Rules\n", + "You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n", + "# User Preamble\n", + "## Task and Context\n", + "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n", + "## Style Guide\n", + "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n" + "## Available Tools\n", + "Here is a list of tools that you have available to you:\n" + ) ) \ No newline at end of file From 90c5dddf9a34f7c504f2b404ee86027f08f15339 Mon Sep 17 00:00:00 2001 From: marko1616 Date: Mon, 15 Apr 2024 20:27:09 +0800 Subject: [PATCH 6/9] Revert "Add support for function call(Not strictly following origin)" This reverts commit d7b9bbc8b956a4aeb16b5b33179687b0972ae9a6. --- src/llmtuner/data/template.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index b41d2642..f409cd9a 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -792,21 +792,5 @@ _register_template( format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), format_assistant=StringFormatter(slots=["{{content}}", "<|END_OF_TURN_TOKEN|>"]), format_system=StringFormatter(slots=[{"bos_token"},"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>"]), - format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), - format_observation=StringFormatter( - slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}","<|END_OF_TURN_TOKEN|>","<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"] - ), - default_system=("# Safety Preamble\n", - "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n", - "# System Preamble\n", - "## Basic Rules\n", - "You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n", - "# User Preamble\n", - "## Task and Context\n", - "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n", - "## Style Guide\n", - "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n" - "## Available Tools\n", - "Here is a list of tools that you have available to you:\n" - ) + default_system="You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.", ) \ No newline at end of file From 2c89b38720bc9105d20ca9ea121dabf165adddad Mon Sep 17 00:00:00 2001 From: marko1616 Date: Mon, 15 Apr 2024 20:45:46 +0800 Subject: [PATCH 7/9] change default_system accroding to official template --- src/llmtuner/data/template.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index f409cd9a..d85adcf3 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -789,8 +789,20 @@ _register_template( _register_template( name="c4ai", - format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), + format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), format_assistant=StringFormatter(slots=["{{content}}", "<|END_OF_TURN_TOKEN|>"]), format_system=StringFormatter(slots=[{"bos_token"},"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>"]), - default_system="You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.", + format_observation=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), + default_system=( + "# Safety Preamble\n", + "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n", + "# System Preamble\n", + "## Basic Rules\n", + "You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n", + "# User Preamble\n", + "## Task and Context\n", + "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n", + "## Style Guide\n", + "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling." + ) ) \ No newline at end of file From 268f53dddbda5905859c6facfce90e90736c6f7d Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 15 Apr 2024 22:56:55 +0800 Subject: [PATCH 8/9] Update constants.py --- src/llmtuner/extras/constants.py | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 5b81a9a9..321c36a4 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -242,6 +242,28 @@ register_model_group( ) +register_model_group( + models={ + "CommandR-35B-Chat": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-v01", + DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-v01", + }, + "CommandR-Plus-104B-Chat": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-plus", + DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-plus", + }, + "CommandR-35B-4bit-Chat": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-v01-4bit", + DownloadSource.MODELSCOPE: "mirror013/c4ai-command-r-v01-4bit", + }, + "CommandR-Plus-104B-4bit-Chat": { + DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-plus-4bit", + }, + }, + template="cohere", +) + + register_model_group( models={ "DeepSeek-LLM-7B-Base": { @@ -994,25 +1016,3 @@ register_model_group( }, template="atom", ) - - -register_model_group( - models={ - "C4AI-Command-R-35B": { - DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-v01", - DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-v01", - } - }, - template="c4ai", -) - - -register_model_group( - models={ - "C4AI-Command-R-plus-104B": { - DownloadSource.DEFAULT: "CohereForAI/c4ai-command-r-plus", - DownloadSource.MODELSCOPE: "AI-ModelScope/c4ai-command-r-plus", - } - }, - template="c4ai", -) \ No newline at end of file From 3ccf0d097708d1d96e52d820599fe74700b935c4 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 15 Apr 2024 22:58:01 +0800 Subject: [PATCH 9/9] Update template.py --- src/llmtuner/data/template.py | 36 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index d85adcf3..7a1f4ab8 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -526,6 +526,21 @@ _register_template( ) +_register_template( + name="cohere", + format_user=StringFormatter( + slots=[ + ( + "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>" + "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" + ) + ] + ), + format_system=EmptyFormatter(slots=[{"bos_token"}]), + force_system=True, +) + + _register_template( name="cpm", format_user=StringFormatter(slots=["<用户>{{content}}"]), @@ -785,24 +800,3 @@ _register_template( format_user=StringFormatter(slots=[":{{content}}\n:"]), format_separator=EmptyFormatter(slots=["\n"]), ) - - -_register_template( - name="c4ai", - format_user=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), - format_assistant=StringFormatter(slots=["{{content}}", "<|END_OF_TURN_TOKEN|>"]), - format_system=StringFormatter(slots=[{"bos_token"},"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|>"]), - format_observation=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"]), - default_system=( - "# Safety Preamble\n", - "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n", - "# System Preamble\n", - "## Basic Rules\n", - "You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n", - "# User Preamble\n", - "## Task and Context\n", - "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n", - "## Style Guide\n", - "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling." - ) -) \ No newline at end of file