[model] remove npu sdpa patch (#9368)

Co-authored-by: frozenleaves <frozen@Mac.local>
2026-03-03 10:15:58 +08:00 · 2025-10-30 16:26:35 +08:00
parent 3057db15c3
commit 767b344fb4
2 changed files with 0 additions and 148 deletions
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -193,23 +193,6 @@ def patch_model(
    if not model_args.use_unsloth:
        print_attn_implementation(model.config)

-    # ======== NPU fused attention redirect: SDPA -> torch_npu.npu_fusion_attention ========
-    # Place after all structural modifications and before DeepSpeed/Trainer initialization;
-    # does not modify any Module/_parameters, safe for ZeRO-3 + offload.
-    try:
-        import os
-
-        import torch
-
-        if hasattr(torch, "npu") and torch.npu.is_available() and os.environ.get("NPU_FA_DISABLE", "0") != "1":
-            from .model_utils.sdpa_npu_redirect import apply_sdpa_npu_redirect
-
-            apply_sdpa_npu_redirect(verbose=not model_args.use_unsloth)
-            logger.info_rank0("[sdpa_npu_redirect] Enabled: SDPA will use Ascend npu_fusion_attention when available.")
-    except Exception as e:
-        logger.warning_rank0(f"[sdpa_npu_redirect] Failed to enable redirect, will keep native SDPA. Reason: {e}")
-    # =====================================================================================
-
    try:
        model.add_model_tags(["llama-factory"])
    except Exception: