mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-06-17 20:58:54 +08:00
[misc] code lint (#10439)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -45,7 +45,7 @@ def apply_liger_kernel(
|
||||
from liger_kernel.transformers import apply_liger_kernel_to_gemma3 as apply_liger_kernel
|
||||
elif model_type == "gemma3_text":
|
||||
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text as apply_liger_kernel
|
||||
elif model_type in ["glm", "glm4"]: # for glm4-9b, glm4-32B respectively
|
||||
elif model_type in ["glm", "glm4"]: # for glm4-9b, glm4-32B respectively
|
||||
from liger_kernel.transformers import apply_liger_kernel_to_glm4 as apply_liger_kernel
|
||||
elif model_type == "glm4v":
|
||||
from liger_kernel.transformers import apply_liger_kernel_to_glm4v as apply_liger_kernel
|
||||
|
||||
@@ -44,15 +44,16 @@ class CompositeModel:
|
||||
language_model_keys: list[str]
|
||||
lora_conflict_keys: list[str]
|
||||
|
||||
|
||||
def get_projectors(self, module: "torch.nn.Module") -> list["torch.nn.Module"]:
|
||||
mm_projectors: list[torch.nn.Module] = []
|
||||
for projector_key in self.projector_keys:
|
||||
project_module = module
|
||||
for key in projector_key.split("."):
|
||||
project_module = getattr(project_module, key, None)
|
||||
if project_module is None: # i,e gemma4 bigger one, there is no embed_audio
|
||||
logger.warning_rank0(f"Projector key {projector_key} not found in module {module.__class__.__name__}.")
|
||||
if project_module is None: # i,e gemma4 bigger one, there is no embed_audio
|
||||
logger.warning_rank0(
|
||||
f"Projector key {projector_key} not found in module {module.__class__.__name__}."
|
||||
)
|
||||
break
|
||||
|
||||
if project_module is not None:
|
||||
|
||||
@@ -119,7 +119,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
|
||||
cache_params=past_key_values,
|
||||
cache_position=cache_position,
|
||||
attention_mask=attention_mask,
|
||||
position_ids=position_ids, # passing position_ids to linear attention
|
||||
position_ids=position_ids, # passing position_ids to linear attention
|
||||
)
|
||||
elif self.layer_type == "full_attention":
|
||||
hidden_states, _ = self.self_attn(
|
||||
@@ -163,11 +163,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
|
||||
position_ids = position_ids[0]
|
||||
|
||||
# `prepare_fa_kwargs_from_position_ids` would crash on None; guard for safety.
|
||||
cu_seqlens = (
|
||||
prepare_fa_kwargs_from_position_ids(position_ids)[0][0]
|
||||
if position_ids is not None
|
||||
else None
|
||||
)
|
||||
cu_seqlens = prepare_fa_kwargs_from_position_ids(position_ids)[0][0] if position_ids is not None else None
|
||||
|
||||
# FLA varlen kernels expect [B, T, D] layout, not [B, D, T] like the
|
||||
# standard causal-conv1d path that the upstream forward uses.
|
||||
@@ -232,6 +228,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
|
||||
|
||||
if model.config.architectures[0] == "Qwen3_5ForConditionalGeneration":
|
||||
from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5DecoderLayer, Qwen3_5GatedDeltaNet
|
||||
|
||||
Qwen3_5DecoderLayer.forward = _patched_decoder_forward
|
||||
Qwen3_5GatedDeltaNet.forward = _patch_gdn_forward
|
||||
elif model.config.architectures[0] == "Qwen3_5MoeForConditionalGeneration":
|
||||
@@ -239,6 +236,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
|
||||
Qwen3_5MoeDecoderLayer,
|
||||
Qwen3_5MoeGatedDeltaNet,
|
||||
)
|
||||
|
||||
Qwen3_5MoeDecoderLayer.forward = _patched_decoder_forward
|
||||
Qwen3_5MoeGatedDeltaNet.forward = _patch_gdn_forward
|
||||
|
||||
|
||||
Reference in New Issue
Block a user