diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py
index 3e351c0f..82a37a76 100644
--- a/src/llamafactory/train/callbacks.py
+++ b/src/llamafactory/train/callbacks.py
@@ -73,7 +73,7 @@ def fix_valuehead_checkpoint(
     if safe_serialization:
         path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME)
         with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f:
-            state_dict: dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()}
+            state_dict: dict[str, torch.Tensor] = {key: f.get_tensor(key).clone() for key in f.keys()}
     else:
         path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME)
         state_dict: dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu", weights_only=True)