mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-09-20 03:52:49 +08:00
Compare commits
5 Commits
59f2bf1ea3
...
28a625bf5b
Author | SHA1 | Date | |
---|---|---|---|
|
28a625bf5b | ||
|
5d89af9e58 | ||
|
cf48406d07 | ||
|
b95c11d8ea | ||
|
aff6923fd1 |
3
.github/workflows/tests.yml
vendored
3
.github/workflows/tests.yml
vendored
@ -40,6 +40,9 @@ jobs:
|
|||||||
- python: "3.9"
|
- python: "3.9"
|
||||||
os: "ubuntu-latest"
|
os: "ubuntu-latest"
|
||||||
transformers: "4.51.0"
|
transformers: "4.51.0"
|
||||||
|
- python: "3.9"
|
||||||
|
os: "ubuntu-latest"
|
||||||
|
transformers: "4.53.0"
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
@ -276,7 +276,7 @@ Choose your path:
|
|||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
||||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
@ -296,7 +296,7 @@ Choose your path:
|
|||||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
||||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
||||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
||||||
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||||
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
|
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
|
||||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||||
@ -309,11 +309,11 @@ Choose your path:
|
|||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
||||||
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||||
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink |
|
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
|
||||||
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
||||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
|
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
|
||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Seed Coder](https://huggingface.co/ByteDance-Seed) | 8B | seed_coder |
|
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
||||||
|
@ -278,7 +278,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
||||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org) | 106B/355B | glm4_moe/glm4v_moe |
|
||||||
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
|
||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
@ -298,7 +298,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
|
||||||
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
|
||||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
|
||||||
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||||
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
|
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
|
||||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||||
@ -311,11 +311,11 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
|
||||||
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
|
||||||
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||||
| [Qwen3 (MoE/Instruct/Thinking)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3/qwen3_nothink |
|
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
|
||||||
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
|
||||||
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
|
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
|
||||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
|
||||||
| [Seed Coder](https://huggingface.co/ByteDance-Seed) | 8B | seed_coder |
|
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
|
||||||
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
|
||||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||||
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 167 KiB After Width: | Height: | Size: 163 KiB |
Binary file not shown.
Before Width: | Height: | Size: 168 KiB After Width: | Height: | Size: 170 KiB |
@ -1,12 +1,11 @@
|
|||||||
# core deps
|
# core deps
|
||||||
transformers>=4.49.0,<=4.55.0,!=4.52.0
|
transformers>=4.49.0,<=4.56.1,!=4.52.0
|
||||||
datasets>=2.16.0,<=3.6.0
|
datasets>=2.16.0,<=4.0.0
|
||||||
accelerate>=1.3.0,<=1.7.0
|
accelerate>=1.3.0,<=1.10.1
|
||||||
peft>=0.14.0,<=0.15.2
|
peft>=0.14.0,<=0.17.1
|
||||||
trl>=0.8.6,<=0.9.6
|
trl>=0.8.6,<=0.9.6
|
||||||
tokenizers>=0.19.0,<=0.21.1
|
|
||||||
# gui
|
# gui
|
||||||
gradio>=4.38.0,<=5.42.0
|
gradio>=4.38.0,<=5.45.0
|
||||||
matplotlib>=3.7.0
|
matplotlib>=3.7.0
|
||||||
tyro<0.9.0
|
tyro<0.9.0
|
||||||
# ops
|
# ops
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@ -227,9 +227,150 @@ class SharegptDatasetConverter(DatasetConverter):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OpenAIDatasetConverter(DatasetConverter):
|
||||||
|
def __call__(self, example: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
tag_mapping = {
|
||||||
|
self.dataset_attr.user_tag: Role.USER.value,
|
||||||
|
self.dataset_attr.assistant_tag: Role.ASSISTANT.value,
|
||||||
|
self.dataset_attr.observation_tag: Role.OBSERVATION.value,
|
||||||
|
self.dataset_attr.function_tag: Role.FUNCTION.value,
|
||||||
|
self.dataset_attr.system_tag: Role.SYSTEM.value,
|
||||||
|
}
|
||||||
|
|
||||||
|
messages = example[self.dataset_attr.messages]
|
||||||
|
if (
|
||||||
|
self.dataset_attr.system_tag
|
||||||
|
and len(messages) != 0
|
||||||
|
and messages[0][self.dataset_attr.role_tag] == self.dataset_attr.system_tag
|
||||||
|
):
|
||||||
|
system = messages[0][self.dataset_attr.content_tag]
|
||||||
|
messages = messages[1:]
|
||||||
|
else:
|
||||||
|
system = example.get(self.dataset_attr.system, "") if self.dataset_attr.system else ""
|
||||||
|
|
||||||
|
aligned_messages = []
|
||||||
|
tool_responses = []
|
||||||
|
broken_data = False
|
||||||
|
for turn_idx, message in enumerate(messages):
|
||||||
|
role = message[self.dataset_attr.role_tag]
|
||||||
|
content = message[self.dataset_attr.content_tag]
|
||||||
|
|
||||||
|
if role in [self.dataset_attr.assistant_tag, self.dataset_attr.function_tag]:
|
||||||
|
if "tool_calls" in message and len(message["tool_calls"]) > 0:
|
||||||
|
tool_calls_list = [tool["function"] for tool in message["tool_calls"]]
|
||||||
|
content = json.dumps(tool_calls_list, ensure_ascii=False)
|
||||||
|
role = self.dataset_attr.function_tag
|
||||||
|
|
||||||
|
if role == self.dataset_attr.observation_tag:
|
||||||
|
tool_responses.append(content)
|
||||||
|
continue
|
||||||
|
elif len(tool_responses) > 0:
|
||||||
|
_content = "\n</tool_response>\n<tool_response>\n".join(tool_responses)
|
||||||
|
aligned_messages.append(
|
||||||
|
{
|
||||||
|
"role": Role.OBSERVATION.value,
|
||||||
|
"content": _content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
tool_responses = []
|
||||||
|
|
||||||
|
aligned_messages.append(
|
||||||
|
{
|
||||||
|
"role": tag_mapping[role],
|
||||||
|
"content": content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
odd_tags = (Role.USER.value, Role.OBSERVATION.value)
|
||||||
|
even_tags = (Role.ASSISTANT.value, Role.FUNCTION.value)
|
||||||
|
accept_tags = (odd_tags, even_tags)
|
||||||
|
for turn_idx, message in enumerate(aligned_messages):
|
||||||
|
if message["role"] not in accept_tags[turn_idx % 2]:
|
||||||
|
logger.warning_rank0(f"Invalid role tag in {messages}.")
|
||||||
|
broken_data = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if (not self.dataset_attr.ranking and len(aligned_messages) % 2 != 0) or (
|
||||||
|
self.dataset_attr.ranking and len(aligned_messages) % 2 == 0
|
||||||
|
):
|
||||||
|
logger.warning_rank0(f"Invalid message count in {messages}.")
|
||||||
|
broken_data = True
|
||||||
|
|
||||||
|
if broken_data:
|
||||||
|
logger.warning_rank0("Skipping this abnormal example.")
|
||||||
|
prompt, response = [], []
|
||||||
|
elif self.dataset_attr.kto_tag and isinstance(example[self.dataset_attr.kto_tag], bool): # kto example
|
||||||
|
prompt = aligned_messages[:-1]
|
||||||
|
response = aligned_messages[-1:]
|
||||||
|
if example[self.dataset_attr.kto_tag]:
|
||||||
|
response = response + [{"role": Role.ASSISTANT.value, "content": ""}]
|
||||||
|
else:
|
||||||
|
response = [{"role": Role.ASSISTANT.value, "content": ""}] + response
|
||||||
|
elif (
|
||||||
|
self.dataset_attr.ranking
|
||||||
|
and isinstance(example[self.dataset_attr.chosen], dict)
|
||||||
|
and isinstance(example[self.dataset_attr.rejected], dict)
|
||||||
|
): # pairwise example
|
||||||
|
chosen = example[self.dataset_attr.chosen]
|
||||||
|
rejected = example[self.dataset_attr.rejected]
|
||||||
|
if (
|
||||||
|
chosen[self.dataset_attr.role_tag] not in accept_tags[-1]
|
||||||
|
or rejected[self.dataset_attr.role_tag] not in accept_tags[-1]
|
||||||
|
):
|
||||||
|
logger.warning_rank0(f"Invalid role tag in {[chosen, rejected]}.")
|
||||||
|
broken_data = True
|
||||||
|
|
||||||
|
prompt = aligned_messages
|
||||||
|
response = [
|
||||||
|
{
|
||||||
|
"role": tag_mapping[chosen[self.dataset_attr.role_tag]],
|
||||||
|
"content": chosen[self.dataset_attr.content_tag],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": tag_mapping[rejected[self.dataset_attr.role_tag]],
|
||||||
|
"content": rejected[self.dataset_attr.content_tag],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
else: # normal example
|
||||||
|
prompt = aligned_messages[:-1]
|
||||||
|
response = aligned_messages[-1:]
|
||||||
|
|
||||||
|
tools = example.get(self.dataset_attr.tools, "") if self.dataset_attr.tools else ""
|
||||||
|
if isinstance(tools, dict) or isinstance(tools, list):
|
||||||
|
tools = json.dumps(tools, ensure_ascii=False)
|
||||||
|
|
||||||
|
short_system_prompt = "detailed thinking off"
|
||||||
|
if not system:
|
||||||
|
if not tools:
|
||||||
|
system = short_system_prompt
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if not tools:
|
||||||
|
if "detailed thinking on" in system or "detailed thinking off" in system:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
system += "\n" + short_system_prompt
|
||||||
|
else:
|
||||||
|
system += "\n"
|
||||||
|
|
||||||
|
output = {
|
||||||
|
"_prompt": prompt,
|
||||||
|
"_response": response,
|
||||||
|
"_system": system,
|
||||||
|
"_tools": tools,
|
||||||
|
"_images": self._find_medias(example[self.dataset_attr.images]) if self.dataset_attr.images else None,
|
||||||
|
"_videos": self._find_medias(example[self.dataset_attr.videos]) if self.dataset_attr.videos else None,
|
||||||
|
"_audios": self._find_medias(example[self.dataset_attr.audios]) if self.dataset_attr.audios else None,
|
||||||
|
}
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
DATASET_CONVERTERS = {
|
DATASET_CONVERTERS = {
|
||||||
"alpaca": AlpacaDatasetConverter,
|
"alpaca": AlpacaDatasetConverter,
|
||||||
"sharegpt": SharegptDatasetConverter,
|
"sharegpt": SharegptDatasetConverter,
|
||||||
|
"openai": OpenAIDatasetConverter,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -679,6 +679,23 @@ register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_template(
|
||||||
|
name="bailing_v2",
|
||||||
|
format_user=StringFormatter(slots=["<role>HUMAN</role>{{content}}<|role_end|><role>ASSISTANT</role>"]),
|
||||||
|
format_system=StringFormatter(slots=["<role>SYSTEM</role>{{content}}<|role_end|>"]),
|
||||||
|
format_assistant=StringFormatter(slots=["{{content}}<|role_end|>"]),
|
||||||
|
format_observation=StringFormatter(
|
||||||
|
slots=[
|
||||||
|
"<role>OBSERVATION</role>\n<tool_response>\n{{content}}\n</tool_response><|role_end|><role>ASSISTANT</role>"
|
||||||
|
]
|
||||||
|
),
|
||||||
|
format_function=FunctionFormatter(slots=["{{content}}<|role_end|>"], tool_format="ling"),
|
||||||
|
format_tools=ToolFormatter(tool_format="ling"),
|
||||||
|
stop_words=["<|endoftext|>"],
|
||||||
|
efficient_eos=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_template(
|
register_template(
|
||||||
name="belle",
|
name="belle",
|
||||||
format_user=StringFormatter(slots=["Human: {{content}}\n\nBelle: "]),
|
format_user=StringFormatter(slots=["Human: {{content}}\n\nBelle: "]),
|
||||||
@ -900,6 +917,18 @@ register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# copied from chatml template
|
||||||
|
register_template(
|
||||||
|
name="ernie",
|
||||||
|
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
|
||||||
|
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n\n"]),
|
||||||
|
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n\n"]),
|
||||||
|
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n\n<|im_start|>assistant\n"]),
|
||||||
|
default_system="<global_setting>\nthink_mode=True\n</global_setting>",
|
||||||
|
stop_words=["<|im_end|>"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_template(
|
register_template(
|
||||||
name="exaone",
|
name="exaone",
|
||||||
format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
|
format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
|
||||||
|
@ -78,6 +78,14 @@ SEED_TOOL_PROMPT = (
|
|||||||
"lines</parameter>\n</function>\n</seed:tool_call>\n"
|
"lines</parameter>\n</function>\n</seed:tool_call>\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
LING_TOOL_PROMPT = (
|
||||||
|
"# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
|
||||||
|
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"
|
||||||
|
"\n</tools>\n\nFor each function call, return a json object with function name and arguments within "
|
||||||
|
"""<tool_call></tool_call> XML tags:\n<tool_call>\n{{"name": <function-name>, """
|
||||||
|
""""arguments": <args-json-object>}}\n</tool_call>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ToolUtils(ABC):
|
class ToolUtils(ABC):
|
||||||
@ -406,6 +414,20 @@ class SeedToolUtils(ToolUtils):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
class LingToolUtils(QwenToolUtils):
|
||||||
|
r"""Ling v2 tool using template."""
|
||||||
|
|
||||||
|
@override
|
||||||
|
@staticmethod
|
||||||
|
def tool_formatter(tools: list[dict[str, Any]]) -> str:
|
||||||
|
tool_text = ""
|
||||||
|
for tool in tools:
|
||||||
|
wrapped_tool = tool if tool.get("type") == "function" else {"type": "function", "function": tool}
|
||||||
|
tool_text += "\n" + json.dumps(wrapped_tool, ensure_ascii=False)
|
||||||
|
|
||||||
|
return LING_TOOL_PROMPT.format(tool_text=tool_text) + "\n" + "detailed thinking off"
|
||||||
|
|
||||||
|
|
||||||
TOOLS = {
|
TOOLS = {
|
||||||
"default": DefaultToolUtils(),
|
"default": DefaultToolUtils(),
|
||||||
"glm4": GLM4ToolUtils(),
|
"glm4": GLM4ToolUtils(),
|
||||||
@ -414,6 +436,7 @@ TOOLS = {
|
|||||||
"qwen": QwenToolUtils(),
|
"qwen": QwenToolUtils(),
|
||||||
"glm4_moe": GLM4MOEToolUtils(),
|
"glm4_moe": GLM4MOEToolUtils(),
|
||||||
"seed_oss": SeedToolUtils(),
|
"seed_oss": SeedToolUtils(),
|
||||||
|
"ling": LingToolUtils(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -601,6 +601,17 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"ERNIE-4.5-21B-A3B-Thinking": {
|
||||||
|
DownloadSource.DEFAULT: "baidu/ERNIE-4.5-21B-A3B-Thinking",
|
||||||
|
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="ernie",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"EXAONE-3.0-7.8B-Instruct": {
|
"EXAONE-3.0-7.8B-Instruct": {
|
||||||
@ -1783,6 +1794,10 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM4-8B",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4-8B",
|
||||||
},
|
},
|
||||||
|
"MiniCPM4.1-8B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "openbmb/MiniCPM4.1-8B",
|
||||||
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM4.1-8B",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="cpm4",
|
template="cpm4",
|
||||||
)
|
)
|
||||||
@ -1790,7 +1805,7 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"MiniCPM-o-2_6": {
|
"MiniCPM-o-2.6": {
|
||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM-o-2_6",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-o-2_6",
|
||||||
},
|
},
|
||||||
@ -1802,7 +1817,7 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"MiniCPM-V-2_6": {
|
"MiniCPM-V-2.6": {
|
||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-2_6",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-2_6",
|
||||||
},
|
},
|
||||||
@ -1826,7 +1841,7 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"MiniCPM-V-4_5": {
|
"MiniCPM-V-4.5": {
|
||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM-V-4_5",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-V-4_5",
|
||||||
},
|
},
|
||||||
@ -1944,6 +1959,37 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"MobileLLM-R1-140M-Base": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M-base",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M-base",
|
||||||
|
},
|
||||||
|
"MobileLLM-R1-360M-Base": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M-base",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M-base",
|
||||||
|
},
|
||||||
|
"MobileLLM-R1-950M-Base": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M-base",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M-base",
|
||||||
|
},
|
||||||
|
"MobileLLM-R1-140M-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-140M",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-140M",
|
||||||
|
},
|
||||||
|
"MobileLLM-R1-360M-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-360M",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-360M",
|
||||||
|
},
|
||||||
|
"MobileLLM-R1-950M-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "facebook/MobileLLM-R1-950M",
|
||||||
|
DownloadSource.MODELSCOPE: "facebook/MobileLLM-R1-950M",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="llama3",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"Moonlight-16B-A3B": {
|
"Moonlight-16B-A3B": {
|
||||||
@ -2912,6 +2958,10 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
|
||||||
},
|
},
|
||||||
|
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="qwen3",
|
template="qwen3",
|
||||||
)
|
)
|
||||||
@ -2931,6 +2981,10 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
||||||
},
|
},
|
||||||
|
"Qwen3-Next-80B-A3B-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="qwen3_nothink",
|
template="qwen3_nothink",
|
||||||
)
|
)
|
||||||
|
@ -94,10 +94,10 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.49.0,<=4.55.0")
|
check_version("transformers>=4.49.0,<=4.56.1")
|
||||||
check_version("datasets>=2.16.0,<=3.6.0")
|
check_version("datasets>=2.16.0,<=4.0.0")
|
||||||
check_version("accelerate>=1.3.0,<=1.7.0")
|
check_version("accelerate>=1.3.0,<=1.10.1")
|
||||||
check_version("peft>=0.14.0,<=0.15.2")
|
check_version("peft>=0.14.0,<=0.17.1")
|
||||||
check_version("trl>=0.8.6,<=0.9.6")
|
check_version("trl>=0.8.6,<=0.9.6")
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,6 +58,10 @@ def is_apollo_available():
|
|||||||
return _is_package_available("apollo_torch")
|
return _is_package_available("apollo_torch")
|
||||||
|
|
||||||
|
|
||||||
|
def is_jieba_available():
|
||||||
|
return _is_package_available("jieba")
|
||||||
|
|
||||||
|
|
||||||
def is_gradio_available():
|
def is_gradio_available():
|
||||||
return _is_package_available("gradio")
|
return _is_package_available("gradio")
|
||||||
|
|
||||||
|
@ -211,9 +211,23 @@ def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") -> None:
|
|||||||
if isinstance(self.pretrained_model, PeftModel):
|
if isinstance(self.pretrained_model, PeftModel):
|
||||||
self.pretrained_model.create_or_update_model_card(output_dir)
|
self.pretrained_model.create_or_update_model_card(output_dir)
|
||||||
|
|
||||||
|
def get_rope_index_func(self: "AutoModelForCausalLMWithValueHead"):
|
||||||
|
if isinstance(self.pretrained_model, PeftModel):
|
||||||
|
base_model = self.pretrained_model.base_model.model
|
||||||
|
else:
|
||||||
|
base_model = self.pretrained_model
|
||||||
|
|
||||||
|
if base_model and hasattr(base_model, "get_rope_index"):
|
||||||
|
return base_model.get_rope_index
|
||||||
|
elif base_model and hasattr(base_model, "model") and hasattr(base_model.model, "get_rope_index"):
|
||||||
|
return base_model.model.get_rope_index
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
ignore_modules = [name for name, _ in model.named_parameters() if "pretrained_model" in name]
|
ignore_modules = [name for name, _ in model.named_parameters() if "pretrained_model" in name]
|
||||||
setattr(model, "_keys_to_ignore_on_save", ignore_modules)
|
setattr(model, "_keys_to_ignore_on_save", ignore_modules)
|
||||||
setattr(model, "tie_weights", MethodType(tie_weights, model))
|
setattr(model, "tie_weights", MethodType(tie_weights, model))
|
||||||
setattr(model, "get_input_embeddings", MethodType(get_input_embeddings, model))
|
setattr(model, "get_input_embeddings", MethodType(get_input_embeddings, model))
|
||||||
setattr(model, "get_output_embeddings", MethodType(get_output_embeddings, model))
|
setattr(model, "get_output_embeddings", MethodType(get_output_embeddings, model))
|
||||||
|
setattr(model, "get_rope_index", get_rope_index_func(model))
|
||||||
setattr(model, "create_or_update_model_card", MethodType(create_or_update_model_card, model))
|
setattr(model, "create_or_update_model_card", MethodType(create_or_update_model_card, model))
|
||||||
|
@ -21,11 +21,11 @@ from typing import TYPE_CHECKING, Optional
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from transformers.utils import is_jieba_available, is_nltk_available
|
from transformers.utils import is_nltk_available
|
||||||
|
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ...extras.misc import numpify
|
from ...extras.misc import numpify
|
||||||
from ...extras.packages import is_rouge_available
|
from ...extras.packages import is_jieba_available, is_rouge_available
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -34,31 +34,36 @@ LOCALES = {
|
|||||||
"en": {
|
"en": {
|
||||||
"value": (
|
"value": (
|
||||||
"<h3><center>Visit <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
"<h3><center>Visit <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
||||||
"GitHub Page</a></center></h3>"
|
"GitHub Page</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
|
||||||
|
"Documentation</a></center></h3>"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"ru": {
|
"ru": {
|
||||||
"value": (
|
"value": (
|
||||||
"<h3><center>Посетить <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
"<h3><center>Посетить <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
||||||
"страницу GitHub</a></center></h3>"
|
"страницу GitHub</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
|
||||||
|
"Документацию</a></center></h3>"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"value": (
|
"value": (
|
||||||
"<h3><center>访问 <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
"<h3><center>访问 <a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
||||||
"GitHub 主页</a></center></h3>"
|
"GitHub 主页</a> <a href='https://llamafactory.readthedocs.io/zh-cn/latest/' target='_blank'>"
|
||||||
|
"官方文档</a></center></h3>"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"ko": {
|
"ko": {
|
||||||
"value": (
|
"value": (
|
||||||
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
||||||
"GitHub 페이지</a>를 방문하세요.</center></h3>"
|
"GitHub 페이지</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
|
||||||
|
"공식 문서</a>를 방문하세요.</center></h3>"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"ja": {
|
"ja": {
|
||||||
"value": (
|
"value": (
|
||||||
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
"<h3><center><a href='https://github.com/hiyouga/LLaMA-Factory' target='_blank'>"
|
||||||
"GitHub ページ</a>にアクセスする</center></h3>"
|
"GitHub ページ</a> <a href='https://llamafactory.readthedocs.io/en/latest/' target='_blank'>"
|
||||||
|
"ドキュメント</a>にアクセスする</center></h3>"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user