[fa2] fix IMA when train qwen3_5 (#10448)

This commit is contained in:
Kingsley
2026-04-29 20:20:55 +08:00
committed by GitHub
parent 50945ef850
commit 3475198d1e

View File

@@ -125,7 +125,7 @@ def patch_qwen3_5_forward(model: "PreTrainedModel") -> None:
hidden_states, _ = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
position_ids=position_ids[None, 0], # keep [1, B, L]
past_key_values=past_key_values,
cache_position=cache_position,
position_embeddings=position_embeddings,