mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-19 12:12:48 +08:00
[assets] update wechat (#8962)
This commit is contained in:
parent
842595698b
commit
44beb76cdf
@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://gitcode.com/zhengyaowei/LLaMA-Factory)
|
|
||||||
|
|
||||||
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
|
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
|
||||||
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
|
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
|
||||||
@ -101,7 +100,7 @@ Choose your path:
|
|||||||
|
|
||||||
## Blogs
|
## Blogs
|
||||||
|
|
||||||
- [Fine-tune GPT-OSS for Role-Playing using LLaMA-Factory](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptoss/?utm_source=LLaMA-Factory) (Chinese)
|
- [Fine-tune GPT-OSS for Role-Playing using LLaMA-Factory](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptroleplay/?utm_source=LLaMA-Factory) (Chinese)
|
||||||
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory) (Chinese)
|
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory) (Chinese)
|
||||||
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
|
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
|
||||||
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
|
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
|
||||||
@ -266,7 +265,7 @@ Choose your path:
|
|||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
||||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://gitcode.com/zhengyaowei/LLaMA-Factory)
|
|
||||||
|
|
||||||
[](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
|
[](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
|
||||||
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
|
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
|
||||||
@ -103,7 +102,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
|
|
||||||
## 官方博客
|
## 官方博客
|
||||||
|
|
||||||
- [使用 LLaMA-Factory 构建 GPT-OSS 角色扮演模型](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptoss/?utm_source=LLaMA-Factory)(中文)
|
- [使用 LLaMA-Factory 构建 GPT-OSS 角色扮演模型](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptroleplay/?utm_source=LLaMA-Factory)(中文)
|
||||||
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory)(中文)
|
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory)(中文)
|
||||||
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
|
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
|
||||||
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
|
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
|
||||||
@ -268,7 +267,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
|
||||||
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
|
||||||
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
|
||||||
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
|
||||||
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
| [GLM-4.1V](https://huggingface.co/zai-org) | 9B | glm4v |
|
||||||
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
| [GLM-4.5/GLM-4.5V](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe/glm4v_moe |
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 169 KiB After Width: | Height: | Size: 167 KiB |
Binary file not shown.
Before Width: | Height: | Size: 170 KiB After Width: | Height: | Size: 169 KiB |
@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "llamafactory"
|
name = "llamafactory"
|
||||||
|
requires-python = ">=3.9.0"
|
||||||
dynamic = [
|
dynamic = [
|
||||||
"version",
|
"version",
|
||||||
"dependencies",
|
"dependencies",
|
||||||
"optional-dependencies",
|
"optional-dependencies",
|
||||||
"requires-python",
|
|
||||||
"scripts",
|
"scripts",
|
||||||
"authors",
|
"authors",
|
||||||
"description",
|
"description",
|
||||||
|
@ -6,7 +6,7 @@ peft>=0.14.0,<=0.15.2
|
|||||||
trl>=0.8.6,<=0.9.6
|
trl>=0.8.6,<=0.9.6
|
||||||
tokenizers>=0.19.0,<=0.21.1
|
tokenizers>=0.19.0,<=0.21.1
|
||||||
# gui
|
# gui
|
||||||
gradio>=4.38.0,<=5.31.0
|
gradio>=4.38.0,<=5.42.0
|
||||||
matplotlib>=3.7.0
|
matplotlib>=3.7.0
|
||||||
tyro<0.9.0
|
tyro<0.9.0
|
||||||
# ops
|
# ops
|
||||||
|
@ -776,6 +776,10 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
|
"Gemma-3-270M": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-3-270m",
|
||||||
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-270m",
|
||||||
|
},
|
||||||
"Gemma-3-4B": {
|
"Gemma-3-4B": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-3-4b-pt",
|
DownloadSource.DEFAULT: "google/gemma-3-4b-pt",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-4b-pt",
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-4b-pt",
|
||||||
@ -788,6 +792,10 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "google/gemma-3-27b-pt",
|
DownloadSource.DEFAULT: "google/gemma-3-27b-pt",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-27b-pt",
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-27b-pt",
|
||||||
},
|
},
|
||||||
|
"Gemma-3-270M-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-3-270m-it",
|
||||||
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-270m-it",
|
||||||
|
},
|
||||||
"Gemma-3-4B-Instruct": {
|
"Gemma-3-4B-Instruct": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-3-4b-it",
|
DownloadSource.DEFAULT: "google/gemma-3-4b-it",
|
||||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-4b-it",
|
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-4b-it",
|
||||||
@ -1669,8 +1677,8 @@ register_model_group(
|
|||||||
},
|
},
|
||||||
"MiMo-VL-7B-RL-2508": {
|
"MiMo-VL-7B-RL-2508": {
|
||||||
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-RL-2508",
|
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-RL-2508",
|
||||||
DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-VL-7B-RL-2508"
|
DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-VL-7B-RL-2508",
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
template="mimo_vl",
|
template="mimo_vl",
|
||||||
multimodal=True,
|
multimodal=True,
|
||||||
@ -1685,7 +1693,7 @@ register_model_group(
|
|||||||
},
|
},
|
||||||
"MiMo-VL-7B-SFT-2508": {
|
"MiMo-VL-7B-SFT-2508": {
|
||||||
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508",
|
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508",
|
||||||
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508"
|
DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="qwen2_vl",
|
template="qwen2_vl",
|
||||||
|
@ -32,6 +32,7 @@ from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_availab
|
|||||||
from ..extras import logging
|
from ..extras import logging
|
||||||
from ..extras.constants import CHECKPOINT_NAMES, EngineName
|
from ..extras.constants import CHECKPOINT_NAMES, EngineName
|
||||||
from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled
|
from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled
|
||||||
|
from ..extras.packages import is_transformers_version_greater_than
|
||||||
from .data_args import DataArguments
|
from .data_args import DataArguments
|
||||||
from .evaluation_args import EvaluationArguments
|
from .evaluation_args import EvaluationArguments
|
||||||
from .finetuning_args import FinetuningArguments
|
from .finetuning_args import FinetuningArguments
|
||||||
@ -304,6 +305,9 @@ def get_train_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _
|
|||||||
if model_args.use_unsloth and is_deepspeed_zero3_enabled():
|
if model_args.use_unsloth and is_deepspeed_zero3_enabled():
|
||||||
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
||||||
|
|
||||||
|
if data_args.neat_packing and is_transformers_version_greater_than("4.53.0"):
|
||||||
|
raise ValueError("Neat packing is incompatible with transformers>=4.53.0.")
|
||||||
|
|
||||||
_set_env_vars()
|
_set_env_vars()
|
||||||
_verify_model_args(model_args, data_args, finetuning_args)
|
_verify_model_args(model_args, data_args, finetuning_args)
|
||||||
_check_extra_dependencies(model_args, finetuning_args, training_args)
|
_check_extra_dependencies(model_args, finetuning_args, training_args)
|
||||||
|
@ -16,11 +16,10 @@ import re
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from peft import LoraConfig, LoraModel, OFTConfig, OFTModel, PeftModel, TaskType, get_peft_model
|
from peft import LoraConfig, LoraModel, OFTConfig, PeftModel, TaskType, get_peft_model
|
||||||
from transformers.integrations import is_deepspeed_zero3_enabled
|
from transformers.integrations import is_deepspeed_zero3_enabled
|
||||||
|
|
||||||
from ..extras import logging
|
from ..extras import logging
|
||||||
from ..extras.misc import check_version
|
|
||||||
from .model_utils.misc import find_all_linear_modules, find_expanded_modules
|
from .model_utils.misc import find_all_linear_modules, find_expanded_modules
|
||||||
from .model_utils.quantization import QuantizationMethod
|
from .model_utils.quantization import QuantizationMethod
|
||||||
from .model_utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
|
from .model_utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
|
||||||
|
@ -111,6 +111,7 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
|
|
||||||
if self.bco_gemma >= 1e-6:
|
if self.bco_gemma >= 1e-6:
|
||||||
from trl.trainer import RunningMoments
|
from trl.trainer import RunningMoments
|
||||||
|
|
||||||
self.running = RunningMoments(self.accelerator)
|
self.running = RunningMoments(self.accelerator)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@ -161,14 +162,14 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
chosen_logps: "torch.Tensor",
|
chosen_logps: "torch.Tensor",
|
||||||
rejected_logps: "torch.Tensor",
|
rejected_logps: "torch.Tensor",
|
||||||
reference_chosen_logps: "torch.Tensor",
|
reference_chosen_logps: "torch.Tensor",
|
||||||
reference_rejected_logps: "torch.Tensor"
|
reference_rejected_logps: "torch.Tensor",
|
||||||
) -> "torch.Tensor":
|
) -> "torch.Tensor":
|
||||||
chosen_logratios = chosen_logps - reference_chosen_logps
|
chosen_logratios = chosen_logps - reference_chosen_logps
|
||||||
rejected_logratios = rejected_logps - reference_rejected_logps
|
rejected_logratios = rejected_logps - reference_rejected_logps
|
||||||
chosen_rewards = self.beta * chosen_logratios
|
chosen_rewards = self.beta * chosen_logratios
|
||||||
rejected_rewards = self.beta * rejected_logratios
|
rejected_rewards = self.beta * rejected_logratios
|
||||||
rewards = torch.cat((chosen_rewards, rejected_rewards), 0).mean().detach()
|
rewards = torch.cat((chosen_rewards, rejected_rewards), 0).mean().detach()
|
||||||
self.running.update(rewards) # update baseline
|
self.running.update(rewards) # update baseline
|
||||||
delta = self.running.mean
|
delta = self.running.mean
|
||||||
bco_loss = -F.logsigmoid((self.beta * chosen_logratios) - delta) - F.logsigmoid(
|
bco_loss = -F.logsigmoid((self.beta * chosen_logratios) - delta) - F.logsigmoid(
|
||||||
-(self.beta * rejected_logratios - delta)
|
-(self.beta * rejected_logratios - delta)
|
||||||
@ -195,15 +196,12 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
rejected_rewards = self.beta * policy_rejected_logps.to(self.accelerator.device).detach()
|
rejected_rewards = self.beta * policy_rejected_logps.to(self.accelerator.device).detach()
|
||||||
else:
|
else:
|
||||||
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
|
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
|
||||||
policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps
|
policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.bco_gemma > 1e-6:
|
if self.bco_gemma > 1e-6:
|
||||||
bco_losses = self.bco_loss(
|
bco_losses = self.bco_loss(
|
||||||
policy_chosen_logps,
|
policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps
|
||||||
policy_rejected_logps,
|
|
||||||
reference_chosen_logps,
|
|
||||||
reference_rejected_logps
|
|
||||||
)
|
)
|
||||||
losses += bco_losses * self.bco_gemma
|
losses += bco_losses * self.bco_gemma
|
||||||
|
|
||||||
@ -288,7 +286,7 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
losses += self.ftx_gamma * sft_loss
|
losses += self.ftx_gamma * sft_loss
|
||||||
if self.bco_gemma > 1e-6:
|
if self.bco_gemma > 1e-6:
|
||||||
# re-weigthing for MPO
|
# re-weigthing for MPO
|
||||||
losses /= (self.ftx_gamma + self.bco_gemma + 1.0)
|
losses /= self.ftx_gamma + self.bco_gemma + 1.0
|
||||||
|
|
||||||
prefix = "eval_" if train_eval == "eval" else ""
|
prefix = "eval_" if train_eval == "eval" else ""
|
||||||
metrics[f"{prefix}rewards/chosen"] = chosen_rewards.mean().item()
|
metrics[f"{prefix}rewards/chosen"] = chosen_rewards.mean().item()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user