diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index 4f34791b..c9612e6e 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -74,6 +74,7 @@ def fix_valuehead_checkpoint( path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME) state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu") + os.remove(path_to_checkpoint) decoder_state_dict = {} v_head_state_dict = {} for name, param in state_dict.items(): @@ -91,7 +92,6 @@ def fix_valuehead_checkpoint( else: torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME)) - os.remove(path_to_checkpoint) logger.info("Value head model saved at: {}".format(output_dir))