[misc] fix grad ckpt func (#6916)

Former-commit-id: e34c3c06da
2026-03-08 12:46:06 +08:00 · 2025-02-13 00:17:18 +08:00
parent bae934dea3
commit 036fb0d561
3 changed files with 17 additions and 13 deletions
--- a/src/llamafactory/model/model_utils/checkpointing.py
+++ b/src/llamafactory/model/model_utils/checkpointing.py
@@ -89,6 +89,7 @@ def get_custom_gradient_checkpointing_func(gradient_checkpointing_func: Callable
            for arg in args:
                if torch.is_tensor(arg) and torch.is_floating_point(arg):
                    arg.requires_grad_(True)
+                    break  # assume the first tensor is always the hidden states

        return gradient_checkpointing_func(func, *args, **kwargs)