mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-29 02:00:36 +08:00
[model] remove npu sdpa patch (#9368)
Co-authored-by: frozenleaves <frozen@Mac.local>
This commit is contained in:
@@ -193,23 +193,6 @@ def patch_model(
|
||||
if not model_args.use_unsloth:
|
||||
print_attn_implementation(model.config)
|
||||
|
||||
# ======== NPU fused attention redirect: SDPA -> torch_npu.npu_fusion_attention ========
|
||||
# Place after all structural modifications and before DeepSpeed/Trainer initialization;
|
||||
# does not modify any Module/_parameters, safe for ZeRO-3 + offload.
|
||||
try:
|
||||
import os
|
||||
|
||||
import torch
|
||||
|
||||
if hasattr(torch, "npu") and torch.npu.is_available() and os.environ.get("NPU_FA_DISABLE", "0") != "1":
|
||||
from .model_utils.sdpa_npu_redirect import apply_sdpa_npu_redirect
|
||||
|
||||
apply_sdpa_npu_redirect(verbose=not model_args.use_unsloth)
|
||||
logger.info_rank0("[sdpa_npu_redirect] Enabled: SDPA will use Ascend npu_fusion_attention when available.")
|
||||
except Exception as e:
|
||||
logger.warning_rank0(f"[sdpa_npu_redirect] Failed to enable redirect, will keep native SDPA. Reason: {e}")
|
||||
# =====================================================================================
|
||||
|
||||
try:
|
||||
model.add_model_tags(["llama-factory"])
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user