mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 04:32:50 +08:00
parent
fe2ac71d11
commit
632fff02e0
@ -13,6 +13,7 @@ class ChatModel:
|
|||||||
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
|
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
|
||||||
model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args)
|
model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args)
|
||||||
self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||||
|
self.tokenizer.padding_side = "left"
|
||||||
self.model = dispatch_model(self.model)
|
self.model = dispatch_model(self.model)
|
||||||
self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer)
|
self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer)
|
||||||
self.system_prompt = data_args.system_prompt
|
self.system_prompt = data_args.system_prompt
|
||||||
|
@ -103,7 +103,6 @@ def load_model_and_tokenizer(
|
|||||||
|
|
||||||
elif hasattr(config, "rope_scaling"): # for LLaMA and Falcon models
|
elif hasattr(config, "rope_scaling"): # for LLaMA and Falcon models
|
||||||
require_version("transformers>=4.31.0", "RoPE scaling requires transformers>=4.31.0")
|
require_version("transformers>=4.31.0", "RoPE scaling requires transformers>=4.31.0")
|
||||||
|
|
||||||
if is_trainable:
|
if is_trainable:
|
||||||
if model_args.rope_scaling == "dynamic":
|
if model_args.rope_scaling == "dynamic":
|
||||||
assert not model_args.flash_attn, "Flash attention does not support dynamic rope scaling."
|
assert not model_args.flash_attn, "Flash attention does not support dynamic rope scaling."
|
||||||
|
@ -10,8 +10,8 @@ if TYPE_CHECKING:
|
|||||||
def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
|
def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
|
||||||
if target == "reward": # save default head temporarily
|
if target == "reward": # save default head temporarily
|
||||||
valuehead_state_dict = model.v_head.state_dict()
|
valuehead_state_dict = model.v_head.state_dict()
|
||||||
setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].clone())
|
setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].detach().clone())
|
||||||
setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].clone())
|
setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].detach().clone())
|
||||||
|
|
||||||
model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active
|
model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active
|
||||||
model.v_head.load_state_dict({
|
model.v_head.load_state_dict({
|
||||||
|
@ -37,6 +37,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra
|
|||||||
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
|
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
|
||||||
model_args, _, training_args, finetuning_args, _, _ = get_train_args(args)
|
model_args, _, training_args, finetuning_args, _, _ = get_train_args(args)
|
||||||
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||||
|
tokenizer.padding_side = "left" # restore padding side
|
||||||
model.save_pretrained(training_args.output_dir, max_shard_size=max_shard_size)
|
model.save_pretrained(training_args.output_dir, max_shard_size=max_shard_size)
|
||||||
try:
|
try:
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user