From 632fff02e08c03a85207c6d60a6da17d12709a2b Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 14 Sep 2023 17:56:58 +0800 Subject: [PATCH] fix #887 Former-commit-id: 8857e4560219c4052bdb7c7dc1a014a5f5fd0163 --- src/llmtuner/chat/stream_chat.py | 1 + src/llmtuner/tuner/core/loader.py | 1 - src/llmtuner/tuner/ppo/utils.py | 4 ++-- src/llmtuner/tuner/tune.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llmtuner/chat/stream_chat.py b/src/llmtuner/chat/stream_chat.py index c6dfe30e..af785dd3 100644 --- a/src/llmtuner/chat/stream_chat.py +++ b/src/llmtuner/chat/stream_chat.py @@ -13,6 +13,7 @@ class ChatModel: def __init__(self, args: Optional[Dict[str, Any]] = None) -> None: model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args) self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args) + self.tokenizer.padding_side = "left" self.model = dispatch_model(self.model) self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer) self.system_prompt = data_args.system_prompt diff --git a/src/llmtuner/tuner/core/loader.py b/src/llmtuner/tuner/core/loader.py index 95c1eee9..ef759140 100644 --- a/src/llmtuner/tuner/core/loader.py +++ b/src/llmtuner/tuner/core/loader.py @@ -103,7 +103,6 @@ def load_model_and_tokenizer( elif hasattr(config, "rope_scaling"): # for LLaMA and Falcon models require_version("transformers>=4.31.0", "RoPE scaling requires transformers>=4.31.0") - if is_trainable: if model_args.rope_scaling == "dynamic": assert not model_args.flash_attn, "Flash attention does not support dynamic rope scaling." diff --git a/src/llmtuner/tuner/ppo/utils.py b/src/llmtuner/tuner/ppo/utils.py index 7c4ac997..2257eead 100644 --- a/src/llmtuner/tuner/ppo/utils.py +++ b/src/llmtuner/tuner/ppo/utils.py @@ -10,8 +10,8 @@ if TYPE_CHECKING: def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None: if target == "reward": # save default head temporarily valuehead_state_dict = model.v_head.state_dict() - setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].clone()) - setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].clone()) + setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].detach().clone()) + setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].detach().clone()) model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active model.v_head.load_state_dict({ diff --git a/src/llmtuner/tuner/tune.py b/src/llmtuner/tuner/tune.py index a4a4c2a1..356122cf 100644 --- a/src/llmtuner/tuner/tune.py +++ b/src/llmtuner/tuner/tune.py @@ -37,6 +37,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"): model_args, _, training_args, finetuning_args, _, _ = get_train_args(args) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args) + tokenizer.padding_side = "left" # restore padding side model.save_pretrained(training_args.output_dir, max_shard_size=max_shard_size) try: tokenizer.save_pretrained(training_args.output_dir)