[npu] Redirect SDPA to torch_npu.npu_fusion_attention (opt-in, ZeRO-3 safe, no impact off NPU) (#8972)

This commit is contained in:
h7878778h
2025-09-30 18:11:31 +08:00
committed by GitHub
parent a04d777d7f
commit 09dedf144f
2 changed files with 148 additions and 0 deletions

View File

@@ -188,6 +188,23 @@ def patch_model(
if not model_args.use_unsloth:
print_attn_implementation(model.config)
# ======== NPU fused attention redirect: SDPA -> torch_npu.npu_fusion_attention ========
# Place after all structural modifications and before DeepSpeed/Trainer initialization;
# does not modify any Module/_parameters, safe for ZeRO-3 + offload.
try:
import os
import torch
if hasattr(torch, "npu") and torch.npu.is_available() and os.environ.get("NPU_FA_DISABLE", "0") != "1":
from .model_utils.sdpa_npu_redirect import apply_sdpa_npu_redirect
apply_sdpa_npu_redirect(verbose=not model_args.use_unsloth)
logger.info_rank0("[sdpa_npu_redirect] Enabled: SDPA will use Ascend npu_fusion_attention when available.")
except Exception as e:
logger.warning_rank0(f"[sdpa_npu_redirect] Failed to enable redirect, will keep native SDPA. Reason: {e}")
# =====================================================================================
try:
model.add_model_tags(["llama-factory"])
except Exception: