diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index 3e351c0f..82a37a76 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -73,7 +73,7 @@ def fix_valuehead_checkpoint( if safe_serialization: path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: - state_dict: dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} + state_dict: dict[str, torch.Tensor] = {key: f.get_tensor(key).clone() for key in f.keys()} else: path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME) state_dict: dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu", weights_only=True)