mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-14 15:52:49 +08:00
parent
86ebb219d6
commit
39169986ef
@ -107,20 +107,22 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
[25/04/16] We supported fine-tuning the **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** model. See [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) to get started.
|
||||||
|
|
||||||
[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
|
[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
|
||||||
|
|
||||||
[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
|
[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
|
||||||
|
|
||||||
[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
|
[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
|
||||||
|
|
||||||
|
<details><summary>Full Changelog</summary>
|
||||||
|
|
||||||
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
|
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
|
||||||
|
|
||||||
[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
|
[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
|
||||||
|
|
||||||
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
|
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
|
||||||
|
|
||||||
<details><summary>Full Changelog</summary>
|
|
||||||
|
|
||||||
[25/02/11] We supported saving the **[Ollama](https://github.com/ollama/ollama)** modelfile when exporting the model checkpoints. See [examples](examples/README.md) for usage.
|
[25/02/11] We supported saving the **[Ollama](https://github.com/ollama/ollama)** modelfile when exporting the model checkpoints. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
|
[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
|
||||||
@ -247,7 +249,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL2.5-3](https://huggingface.co/OpenGVLab/InternVL)\*\* | 1B/2B/4B/8B/9B/14B/26B/38B/78B | intern_vl |
|
| [InternVL 2.5-3](https://huggingface.co/OpenGVLab)\*\* | 1B/2B/4B/8B/9B/14B/26B/38B/78B | intern_vl |
|
||||||
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
||||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||||
|
@ -110,20 +110,22 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
|
|
||||||
## 更新日志
|
## 更新日志
|
||||||
|
|
||||||
|
[25/04/16] 我们支持了 **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** 模型的微调。查看 [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) 以使用。
|
||||||
|
|
||||||
[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
|
[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
|
||||||
|
|
||||||
[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
|
[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
|
||||||
|
|
||||||
[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
|
[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
|
||||||
|
|
||||||
|
<details><summary>展开日志</summary>
|
||||||
|
|
||||||
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
|
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
|
||||||
|
|
||||||
[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
|
[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
|
||||||
|
|
||||||
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
|
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
|
||||||
|
|
||||||
<details><summary>展开日志</summary>
|
|
||||||
|
|
||||||
[25/02/11] 我们支持了在导出模型时保存 **[Ollama](https://github.com/ollama/ollama)** 配置文件。详细用法请参照 [examples](examples/README_zh.md)。
|
[25/02/11] 我们支持了在导出模型时保存 **[Ollama](https://github.com/ollama/ollama)** 配置文件。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[25/02/05] 我们支持了在语音理解任务上微调 **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** 和 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 模型。
|
[25/02/05] 我们支持了在语音理解任务上微调 **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** 和 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 模型。
|
||||||
@ -250,7 +252,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL2.5-3](https://huggingface.co/OpenGVLab/InternVL)\*\* | 1B/2B/4B/8B/9B/14B/26B/38B/78B | intern_vl |
|
| [InternVL 2.5-3](https://huggingface.co/OpenGVLab)\*\* | 1B/2B/4B/8B/9B/14B/26B/38B/78B | intern_vl |
|
||||||
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
|
||||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||||
|
@ -17,16 +17,8 @@ r"""Efficient fine-tuning of large language models.
|
|||||||
Level:
|
Level:
|
||||||
api, webui > chat, eval, train > data, model > hparams > extras
|
api, webui > chat, eval, train > data, model > hparams > extras
|
||||||
|
|
||||||
Dependency graph:
|
|
||||||
transformers>=4.41.2,<=4.43.0,!=4.46.*,!=4.47.*,!=4.48.0
|
|
||||||
datasets>=2.16.0,<=3.5.0
|
|
||||||
accelerate>=0.34.0,<=1.6.0
|
|
||||||
peft>=0.14.0,<=0.15.1
|
|
||||||
trl>=0.8.6,<=0.9.6
|
|
||||||
|
|
||||||
Disable version checking: DISABLE_VERSION_CHECK=1
|
Disable version checking: DISABLE_VERSION_CHECK=1
|
||||||
Enable VRAM recording: RECORD_VRAM=1
|
Enable VRAM recording: RECORD_VRAM=1
|
||||||
Force check imports: FORCE_CHECK_IMPORTS=1
|
|
||||||
Force using torchrun: FORCE_TORCHRUN=1
|
Force using torchrun: FORCE_TORCHRUN=1
|
||||||
Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
|
Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
|
||||||
Use modelscope: USE_MODELSCOPE_HUB=1
|
Use modelscope: USE_MODELSCOPE_HUB=1
|
||||||
|
@ -21,7 +21,7 @@ import re
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import TYPE_CHECKING, BinaryIO, Literal, Optional, TypedDict, Union
|
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, Optional, TypedDict, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -86,7 +86,7 @@ if TYPE_CHECKING:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _concatenate_list(input_list):
|
def _concatenate_list(input_list: list[Any]) -> Union[list[Any], "NDArray", "torch.Tensor"]:
|
||||||
r"""Concatenate a list of lists, numpy arrays or torch tensors.
|
r"""Concatenate a list of lists, numpy arrays or torch tensors.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -89,7 +89,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.43.0,<=4.51.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
check_version("transformers>=4.45.0,<=4.51.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
||||||
check_version("datasets>=2.16.0,<=3.5.0")
|
check_version("datasets>=2.16.0,<=3.5.0")
|
||||||
check_version("accelerate>=0.34.0,<=1.6.0")
|
check_version("accelerate>=0.34.0,<=1.6.0")
|
||||||
check_version("peft>=0.14.0,<=0.15.1")
|
check_version("peft>=0.14.0,<=0.15.1")
|
||||||
|
@ -23,7 +23,6 @@ from typing import TYPE_CHECKING, Optional
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import transformers
|
import transformers
|
||||||
from transformers.models.llama.modeling_llama import Cache, apply_rotary_pos_emb, repeat_kv
|
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.constants import SUPPORTED_CLASS_FOR_S2ATTN
|
from ...extras.constants import SUPPORTED_CLASS_FOR_S2ATTN
|
||||||
@ -32,7 +31,15 @@ from ...extras.packages import is_transformers_version_greater_than
|
|||||||
|
|
||||||
|
|
||||||
if not is_transformers_version_greater_than("4.48.0"):
|
if not is_transformers_version_greater_than("4.48.0"):
|
||||||
from transformers.models.llama.modeling_llama import LlamaAttention, LlamaFlashAttention2, LlamaSdpaAttention
|
from transformers.modeling_flash_attention_utils import _flash_attention_forward
|
||||||
|
from transformers.models.llama.modeling_llama import (
|
||||||
|
Cache,
|
||||||
|
LlamaAttention,
|
||||||
|
LlamaFlashAttention2,
|
||||||
|
LlamaSdpaAttention,
|
||||||
|
apply_rotary_pos_emb,
|
||||||
|
repeat_kv,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@ -206,9 +213,6 @@ def llama_flash_attention_2_forward(
|
|||||||
if attention_mask is not None:
|
if attention_mask is not None:
|
||||||
attention_mask = attention_mask[:, :groupsz].repeat(num_groups, 1)
|
attention_mask = attention_mask[:, :groupsz].repeat(num_groups, 1)
|
||||||
|
|
||||||
if is_transformers_version_greater_than("4.43.0"):
|
|
||||||
from transformers.modeling_flash_attention_utils import _flash_attention_forward
|
|
||||||
|
|
||||||
attn_output: torch.Tensor = _flash_attention_forward(
|
attn_output: torch.Tensor = _flash_attention_forward(
|
||||||
query_states,
|
query_states,
|
||||||
key_states,
|
key_states,
|
||||||
@ -220,10 +224,6 @@ def llama_flash_attention_2_forward(
|
|||||||
use_top_left_mask=self._flash_attn_uses_top_left_mask,
|
use_top_left_mask=self._flash_attn_uses_top_left_mask,
|
||||||
is_causal=self.is_causal,
|
is_causal=self.is_causal,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
attn_output: torch.Tensor = self._flash_attention_forward(
|
|
||||||
query_states, key_states, value_states, attention_mask, query_states.size(1), dropout=dropout_rate
|
|
||||||
)
|
|
||||||
|
|
||||||
if getattr(self.config, "group_size_ratio", None) and self.training: # shift back
|
if getattr(self.config, "group_size_ratio", None) and self.training: # shift back
|
||||||
attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim)
|
attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim)
|
||||||
@ -350,7 +350,7 @@ def llama_sdpa_attention_forward(
|
|||||||
|
|
||||||
|
|
||||||
def _apply_llama_patch() -> None:
|
def _apply_llama_patch() -> None:
|
||||||
check_version("transformers>=4.43.0,<4.48.0", mandatory=True)
|
check_version("transformers>=4.45.0,<4.48.0", mandatory=True)
|
||||||
LlamaAttention.forward = llama_attention_forward
|
LlamaAttention.forward = llama_attention_forward
|
||||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||||
|
@ -43,11 +43,6 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.packages import is_transformers_version_greater_than
|
|
||||||
|
|
||||||
|
|
||||||
if is_transformers_version_greater_than("4.43.0"):
|
|
||||||
import transformers.modeling_flash_attention_utils
|
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@ -116,5 +111,7 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
|
|||||||
if not is_trainable or not model_args.block_diag_attn:
|
if not is_trainable or not model_args.block_diag_attn:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
import transformers.modeling_flash_attention_utils
|
||||||
|
|
||||||
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
|
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
|
||||||
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")
|
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")
|
||||||
|
@ -40,6 +40,11 @@ class CustomTrainer(Trainer):
|
|||||||
kwargs["processing_class"] = kwargs.pop("tokenizer")
|
kwargs["processing_class"] = kwargs.pop("tokenizer")
|
||||||
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
if processor is not None:
|
||||||
|
# avoid wrong loss under gradient accumulation
|
||||||
|
# https://github.com/huggingface/transformers/pull/36044#issuecomment-2746657112
|
||||||
|
self.model_accepts_loss_kwargs = False
|
||||||
|
|
||||||
self.finetuning_args = finetuning_args
|
self.finetuning_args = finetuning_args
|
||||||
|
|
||||||
if processor is not None:
|
if processor is not None:
|
||||||
|
@ -60,6 +60,8 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
|||||||
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
if processor is not None:
|
if processor is not None:
|
||||||
|
# avoid wrong loss under gradient accumulation
|
||||||
|
# https://github.com/huggingface/transformers/pull/36044#issuecomment-2746657112
|
||||||
self.model_accepts_loss_kwargs = False
|
self.model_accepts_loss_kwargs = False
|
||||||
|
|
||||||
self.finetuning_args = finetuning_args
|
self.finetuning_args = finetuning_args
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
# change if test fails
|
# change if test fails or cache is outdated
|
||||||
0.9.3.103
|
0.9.3.103
|
||||||
|
Loading…
x
Reference in New Issue
Block a user