mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-18 04:40:35 +08:00
@@ -92,7 +92,7 @@ def fix_valuehead_checkpoint(
|
||||
else:
|
||||
torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME))
|
||||
|
||||
logger.info("Value head model saved at: {}".format(output_dir))
|
||||
logger.info(f"Value head model saved at: {output_dir}")
|
||||
|
||||
|
||||
class FixValueHeadModelCallback(TrainerCallback):
|
||||
@@ -106,7 +106,7 @@ class FixValueHeadModelCallback(TrainerCallback):
|
||||
Event called after a checkpoint save.
|
||||
"""
|
||||
if args.should_save:
|
||||
output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
|
||||
output_dir = os.path.join(args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}")
|
||||
fix_valuehead_checkpoint(
|
||||
model=kwargs.pop("model"), output_dir=output_dir, safe_serialization=args.save_safetensors
|
||||
)
|
||||
@@ -123,7 +123,7 @@ class SaveProcessorCallback(TrainerCallback):
|
||||
@override
|
||||
def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||
if args.should_save:
|
||||
output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
|
||||
output_dir = os.path.join(args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}")
|
||||
getattr(self.processor, "image_processor").save_pretrained(output_dir)
|
||||
|
||||
@override
|
||||
@@ -145,7 +145,7 @@ class PissaConvertCallback(TrainerCallback):
|
||||
if args.should_save:
|
||||
model = kwargs.pop("model")
|
||||
pissa_init_dir = os.path.join(args.output_dir, "pissa_init")
|
||||
logger.info("Initial PiSSA adapter will be saved at: {}.".format(pissa_init_dir))
|
||||
logger.info(f"Initial PiSSA adapter will be saved at: {pissa_init_dir}.")
|
||||
if isinstance(model, PeftModel):
|
||||
init_lora_weights = getattr(model.peft_config["default"], "init_lora_weights")
|
||||
setattr(model.peft_config["default"], "init_lora_weights", True)
|
||||
@@ -159,7 +159,7 @@ class PissaConvertCallback(TrainerCallback):
|
||||
pissa_init_dir = os.path.join(args.output_dir, "pissa_init")
|
||||
pissa_backup_dir = os.path.join(args.output_dir, "pissa_backup")
|
||||
pissa_convert_dir = os.path.join(args.output_dir, "pissa_converted")
|
||||
logger.info("Converted PiSSA adapter will be saved at: {}.".format(pissa_convert_dir))
|
||||
logger.info(f"Converted PiSSA adapter will be saved at: {pissa_convert_dir}.")
|
||||
# 1. save a pissa backup with init_lora_weights: True
|
||||
# 2. save a converted lora with init_lora_weights: pissa
|
||||
# 3. load the pissa backup with init_lora_weights: True
|
||||
|
||||
@@ -156,7 +156,7 @@ class CustomDPOTrainer(DPOTrainer):
|
||||
elif self.loss_type == "simpo":
|
||||
losses = self.simpo_loss(policy_chosen_logps, policy_rejected_logps)
|
||||
else:
|
||||
raise NotImplementedError("Unknown loss type: {}.".format(self.loss_type))
|
||||
raise NotImplementedError(f"Unknown loss type: {self.loss_type}.")
|
||||
|
||||
chosen_rewards = self.beta * policy_chosen_logps.to(self.accelerator.device).detach()
|
||||
rejected_rewards = self.beta * policy_rejected_logps.to(self.accelerator.device).detach()
|
||||
@@ -245,16 +245,16 @@ class CustomDPOTrainer(DPOTrainer):
|
||||
reward_accuracies = (chosen_rewards > rejected_rewards).float()
|
||||
|
||||
prefix = "eval_" if train_eval == "eval" else ""
|
||||
metrics["{}rewards/chosen".format(prefix)] = chosen_rewards.mean().cpu()
|
||||
metrics["{}rewards/rejected".format(prefix)] = rejected_rewards.mean().cpu()
|
||||
metrics["{}rewards/accuracies".format(prefix)] = reward_accuracies.mean().cpu()
|
||||
metrics["{}rewards/margins".format(prefix)] = (chosen_rewards - rejected_rewards).mean().cpu()
|
||||
metrics["{}logps/rejected".format(prefix)] = policy_rejected_logps.detach().mean().cpu()
|
||||
metrics["{}logps/chosen".format(prefix)] = policy_chosen_logps.detach().mean().cpu()
|
||||
metrics["{}logits/rejected".format(prefix)] = policy_rejected_logits.detach().mean().cpu()
|
||||
metrics["{}logits/chosen".format(prefix)] = policy_chosen_logits.detach().mean().cpu()
|
||||
metrics[f"{prefix}rewards/chosen"] = chosen_rewards.mean().cpu()
|
||||
metrics[f"{prefix}rewards/rejected"] = rejected_rewards.mean().cpu()
|
||||
metrics[f"{prefix}rewards/accuracies"] = reward_accuracies.mean().cpu()
|
||||
metrics[f"{prefix}rewards/margins"] = (chosen_rewards - rejected_rewards).mean().cpu()
|
||||
metrics[f"{prefix}logps/rejected"] = policy_rejected_logps.detach().mean().cpu()
|
||||
metrics[f"{prefix}logps/chosen"] = policy_chosen_logps.detach().mean().cpu()
|
||||
metrics[f"{prefix}logits/rejected"] = policy_rejected_logits.detach().mean().cpu()
|
||||
metrics[f"{prefix}logits/chosen"] = policy_chosen_logits.detach().mean().cpu()
|
||||
if self.loss_type == "orpo":
|
||||
metrics["{}sft_loss".format(prefix)] = sft_loss.detach().mean().cpu()
|
||||
metrics["{}odds_ratio_loss".format(prefix)] = ((losses - sft_loss) / self.beta).detach().mean().cpu()
|
||||
metrics[f"{prefix}sft_loss"] = sft_loss.detach().mean().cpu()
|
||||
metrics[f"{prefix}odds_ratio_loss"] = ((losses - sft_loss) / self.beta).detach().mean().cpu()
|
||||
|
||||
return losses.mean(), metrics
|
||||
|
||||
@@ -129,11 +129,11 @@ class CustomKTOTrainer(KTOTrainer):
|
||||
"""
|
||||
batch = {k: v.detach().clone() for k, v in batch.items()} # avoid error
|
||||
model_inputs = {
|
||||
"input_ids": batch["{}input_ids".format(prefix)],
|
||||
"attention_mask": batch["{}attention_mask".format(prefix)],
|
||||
"input_ids": batch[f"{prefix}input_ids"],
|
||||
"attention_mask": batch[f"{prefix}attention_mask"],
|
||||
}
|
||||
if "{}token_type_ids".format(prefix) in batch:
|
||||
model_inputs["token_type_ids"] = batch["{}token_type_ids".format(prefix)]
|
||||
if f"{prefix}token_type_ids" in batch:
|
||||
model_inputs["token_type_ids"] = batch[f"{prefix}token_type_ids"]
|
||||
|
||||
if "pixel_values" in batch:
|
||||
model_inputs["pixel_values"] = batch["pixel_values"]
|
||||
@@ -142,7 +142,7 @@ class CustomKTOTrainer(KTOTrainer):
|
||||
model_inputs["image_grid_thw"] = batch["image_grid_thw"]
|
||||
|
||||
logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32)
|
||||
logps, valid_length = get_batch_logps(logits=logits, labels=batch["{}labels".format(prefix)])
|
||||
logps, valid_length = get_batch_logps(logits=logits, labels=batch[f"{prefix}labels"])
|
||||
return logps, logps / valid_length
|
||||
|
||||
@override
|
||||
|
||||
@@ -62,8 +62,8 @@ def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["d
|
||||
setattr(model, "default_head_bias", v_head_layer.bias.data.detach().clone())
|
||||
|
||||
device = v_head_layer.weight.device
|
||||
v_head_layer.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
|
||||
v_head_layer.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
|
||||
v_head_layer.weight.data = model.get_buffer(f"{target}_head_weight").detach().clone().to(device)
|
||||
v_head_layer.bias.data = model.get_buffer(f"{target}_head_bias").detach().clone().to(device)
|
||||
|
||||
|
||||
def dump_layernorm(model: "PreTrainedModel") -> Dict[str, "torch.Tensor"]:
|
||||
|
||||
@@ -218,18 +218,18 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
||||
|
||||
if self.is_world_process_zero():
|
||||
logger.info("***** Running training *****")
|
||||
logger.info(" Num examples = {:,}".format(num_examples))
|
||||
logger.info(" Num Epochs = {:,}".format(num_train_epochs))
|
||||
logger.info(" Instantaneous batch size per device = {:,}".format(self.args.per_device_train_batch_size))
|
||||
logger.info(f" Num examples = {num_examples:,}")
|
||||
logger.info(f" Num Epochs = {num_train_epochs:,}")
|
||||
logger.info(f" Instantaneous batch size per device = {self.args.per_device_train_batch_size:,}")
|
||||
logger.info(
|
||||
" Total train batch size (w. parallel, buffer, distributed & accumulation) = {:,}".format(
|
||||
total_train_batch_size
|
||||
)
|
||||
)
|
||||
logger.info(" Gradient Accumulation steps = {:,}".format(self.args.gradient_accumulation_steps))
|
||||
logger.info(" Num optimization epochs per batch = {:,}".format(self.finetuning_args.ppo_epochs))
|
||||
logger.info(" Total training steps = {:,}".format(max_steps))
|
||||
logger.info(" Number of trainable parameters = {:,}".format(count_parameters(self.model)[0]))
|
||||
logger.info(f" Gradient Accumulation steps = {self.args.gradient_accumulation_steps:,}")
|
||||
logger.info(f" Num optimization epochs per batch = {self.finetuning_args.ppo_epochs:,}")
|
||||
logger.info(f" Total training steps = {max_steps:,}")
|
||||
logger.info(f" Number of trainable parameters = {count_parameters(self.model)[0]:,}")
|
||||
|
||||
dataiter = iter(self.dataloader)
|
||||
loss_meter = AverageMeter()
|
||||
@@ -290,7 +290,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
||||
|
||||
if (step + 1) % self.args.save_steps == 0: # save checkpoint
|
||||
self.save_model(
|
||||
os.path.join(self.args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, self.state.global_step))
|
||||
os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
|
||||
)
|
||||
self.callback_handler.on_save(self.args, self.state, self.control)
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ def create_ref_model(
|
||||
ref_model = load_model(
|
||||
tokenizer, ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead
|
||||
)
|
||||
logger.info("Created reference model from {}".format(finetuning_args.ref_model))
|
||||
logger.info(f"Created reference model from {finetuning_args.ref_model}")
|
||||
else:
|
||||
if finetuning_args.finetuning_type == "lora":
|
||||
ref_model = None
|
||||
@@ -140,7 +140,7 @@ def create_reward_model(
|
||||
"""
|
||||
if finetuning_args.reward_model_type == "api":
|
||||
assert finetuning_args.reward_model.startswith("http"), "Please provide full url."
|
||||
logger.info("Use reward server {}".format(finetuning_args.reward_model))
|
||||
logger.info(f"Use reward server {finetuning_args.reward_model}")
|
||||
return finetuning_args.reward_model
|
||||
elif finetuning_args.reward_model_type == "lora":
|
||||
model.pretrained_model.load_adapter(finetuning_args.reward_model, "reward")
|
||||
@@ -157,7 +157,7 @@ def create_reward_model(
|
||||
model.register_buffer(
|
||||
"default_head_bias", torch.zeros_like(vhead_params["v_head.summary.bias"]), persistent=False
|
||||
)
|
||||
logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model))
|
||||
logger.info(f"Loaded adapter weights of reward model from {finetuning_args.reward_model}")
|
||||
return None
|
||||
else:
|
||||
reward_model_args = ModelArguments.copyfrom(
|
||||
@@ -171,7 +171,7 @@ def create_reward_model(
|
||||
reward_model = load_model(
|
||||
tokenizer, reward_model_args, reward_finetuning_args, is_trainable=False, add_valuehead=True
|
||||
)
|
||||
logger.info("Loaded full weights of reward model from {}".format(finetuning_args.reward_model))
|
||||
logger.info(f"Loaded full weights of reward model from {finetuning_args.reward_model}")
|
||||
logger.warning("Please ensure the ppo model and reward model share SAME tokenizer and vocabulary.")
|
||||
return reward_model
|
||||
|
||||
@@ -231,7 +231,7 @@ def _create_galore_optimizer(
|
||||
elif training_args.optim == "adafactor":
|
||||
optim_class = GaLoreAdafactor
|
||||
else:
|
||||
raise NotImplementedError("Unknow optim: {}".format(training_args.optim))
|
||||
raise NotImplementedError(f"Unknow optim: {training_args.optim}")
|
||||
|
||||
if finetuning_args.galore_layerwise:
|
||||
if training_args.gradient_accumulation_steps != 1:
|
||||
@@ -305,7 +305,7 @@ def _create_loraplus_optimizer(
|
||||
dict(params=param_dict["embedding"], lr=embedding_lr, weight_decay=training_args.weight_decay),
|
||||
]
|
||||
optimizer = optim_class(param_groups, **optim_kwargs)
|
||||
logger.info("Using LoRA+ optimizer with loraplus lr ratio {:.2f}.".format(finetuning_args.loraplus_lr_ratio))
|
||||
logger.info(f"Using LoRA+ optimizer with loraplus lr ratio {finetuning_args.loraplus_lr_ratio:.2f}.")
|
||||
return optimizer
|
||||
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallb
|
||||
elif finetuning_args.stage == "kto":
|
||||
run_kto(model_args, data_args, training_args, finetuning_args, callbacks)
|
||||
else:
|
||||
raise ValueError("Unknown task: {}.".format(finetuning_args.stage))
|
||||
raise ValueError(f"Unknown task: {finetuning_args.stage}.")
|
||||
|
||||
|
||||
def export_model(args: Optional[Dict[str, Any]] = None) -> None:
|
||||
@@ -91,18 +91,18 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
|
||||
|
||||
setattr(model.config, "torch_dtype", output_dtype)
|
||||
model = model.to(output_dtype)
|
||||
logger.info("Convert model dtype to: {}.".format(output_dtype))
|
||||
logger.info(f"Convert model dtype to: {output_dtype}.")
|
||||
|
||||
model.save_pretrained(
|
||||
save_directory=model_args.export_dir,
|
||||
max_shard_size="{}GB".format(model_args.export_size),
|
||||
max_shard_size=f"{model_args.export_size}GB",
|
||||
safe_serialization=(not model_args.export_legacy_format),
|
||||
)
|
||||
if model_args.export_hub_model_id is not None:
|
||||
model.push_to_hub(
|
||||
model_args.export_hub_model_id,
|
||||
token=model_args.hf_hub_token,
|
||||
max_shard_size="{}GB".format(model_args.export_size),
|
||||
max_shard_size=f"{model_args.export_size}GB",
|
||||
safe_serialization=(not model_args.export_legacy_format),
|
||||
)
|
||||
|
||||
@@ -117,13 +117,13 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
|
||||
os.path.join(vhead_path, V_HEAD_SAFE_WEIGHTS_NAME),
|
||||
os.path.join(model_args.export_dir, V_HEAD_SAFE_WEIGHTS_NAME),
|
||||
)
|
||||
logger.info("Copied valuehead to {}.".format(model_args.export_dir))
|
||||
logger.info(f"Copied valuehead to {model_args.export_dir}.")
|
||||
elif os.path.exists(os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME)):
|
||||
shutil.copy(
|
||||
os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME),
|
||||
os.path.join(model_args.export_dir, V_HEAD_WEIGHTS_NAME),
|
||||
)
|
||||
logger.info("Copied valuehead to {}.".format(model_args.export_dir))
|
||||
logger.info(f"Copied valuehead to {model_args.export_dir}.")
|
||||
|
||||
try:
|
||||
tokenizer.padding_side = "left" # restore padding side
|
||||
@@ -140,4 +140,4 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Cannot save tokenizer, please copy the files manually: {}.".format(e))
|
||||
logger.warning(f"Cannot save tokenizer, please copy the files manually: {e}.")
|
||||
|
||||
Reference in New Issue
Block a user