[misc] code lint (#10439)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Kingsley
2026-04-27 14:07:31 +08:00
committed by GitHub
parent 9a0cfdccfa
commit 99464b3d03
16 changed files with 143 additions and 80 deletions

View File

@@ -45,7 +45,7 @@ def apply_liger_kernel(
from liger_kernel.transformers import apply_liger_kernel_to_gemma3 as apply_liger_kernel
elif model_type == "gemma3_text":
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text as apply_liger_kernel
elif model_type in ["glm", "glm4"]: # for glm4-9b, glm4-32B respectively
elif model_type in ["glm", "glm4"]: # for glm4-9b, glm4-32B respectively
from liger_kernel.transformers import apply_liger_kernel_to_glm4 as apply_liger_kernel
elif model_type == "glm4v":
from liger_kernel.transformers import apply_liger_kernel_to_glm4v as apply_liger_kernel

View File

@@ -44,15 +44,16 @@ class CompositeModel:
language_model_keys: list[str]
lora_conflict_keys: list[str]
def get_projectors(self, module: "torch.nn.Module") -> list["torch.nn.Module"]:
mm_projectors: list[torch.nn.Module] = []
for projector_key in self.projector_keys:
project_module = module
for key in projector_key.split("."):
project_module = getattr(project_module, key, None)
if project_module is None: # i,e gemma4 bigger one, there is no embed_audio
logger.warning_rank0(f"Projector key {projector_key} not found in module {module.__class__.__name__}.")
if project_module is None: # i,e gemma4 bigger one, there is no embed_audio
logger.warning_rank0(
f"Projector key {projector_key} not found in module {module.__class__.__name__}."
)
break
if project_module is not None:

View File

@@ -119,7 +119,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
cache_params=past_key_values,
cache_position=cache_position,
attention_mask=attention_mask,
position_ids=position_ids, # passing position_ids to linear attention
position_ids=position_ids, # passing position_ids to linear attention
)
elif self.layer_type == "full_attention":
hidden_states, _ = self.self_attn(
@@ -163,11 +163,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
position_ids = position_ids[0]
# `prepare_fa_kwargs_from_position_ids` would crash on None; guard for safety.
cu_seqlens = (
prepare_fa_kwargs_from_position_ids(position_ids)[0][0]
if position_ids is not None
else None
)
cu_seqlens = prepare_fa_kwargs_from_position_ids(position_ids)[0][0] if position_ids is not None else None
# FLA varlen kernels expect [B, T, D] layout, not [B, D, T] like the
# standard causal-conv1d path that the upstream forward uses.
@@ -232,6 +228,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
if model.config.architectures[0] == "Qwen3_5ForConditionalGeneration":
from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5DecoderLayer, Qwen3_5GatedDeltaNet
Qwen3_5DecoderLayer.forward = _patched_decoder_forward
Qwen3_5GatedDeltaNet.forward = _patch_gdn_forward
elif model.config.architectures[0] == "Qwen3_5MoeForConditionalGeneration":
@@ -239,6 +236,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
Qwen3_5MoeDecoderLayer,
Qwen3_5MoeGatedDeltaNet,
)
Qwen3_5MoeDecoderLayer.forward = _patched_decoder_forward
Qwen3_5MoeGatedDeltaNet.forward = _patch_gdn_forward