From ae1048db6d55a039a2fc45c7e16c6ddc674987df Mon Sep 17 00:00:00 2001 From: hiyouga Date: Tue, 28 Nov 2023 20:52:28 +0800 Subject: [PATCH] fix #1659 Former-commit-id: 475a3fa0f4c09d4cfd55ec66271a6d3c9eb5f4d2 --- src/llmtuner/extras/constants.py | 9 ++++++--- src/llmtuner/hparams/finetuning_args.py | 2 +- src/llmtuner/model/loader.py | 3 +++ src/llmtuner/train/tuner.py | 4 ++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 81040d64..3257678e 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -217,7 +217,11 @@ register_model_group( "Qwen-7B": "Qwen/Qwen-7B", "Qwen-14B": "Qwen/Qwen-14B", "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", + "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8", + "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4", + "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8", + "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4" }, module="c_attn", template="qwen" @@ -266,8 +270,7 @@ register_model_group( "Yi-6B": "01-ai/Yi-6B", "Yi-34B": "01-ai/Yi-34B", "Yi-34B-Chat": "01-ai/Yi-34B-Chat", - "Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits", - "Yi-34B-int4-Chat": "01-ai/Yi-34B-Chat-4bits" + "Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits" }, template="yi" ) diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index e4c4a37b..cf60676a 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -179,7 +179,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments): raise ValueError("Reward model is necessary for PPO training.") if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora": - raise ValueError("Lora reward model only supports lora training.") + raise ValueError("Freeze/Full PPO training needs `reward_model_type=full`.") def save_to_json(self, json_path: str): r"""Saves the content of this instance in JSON format inside `json_path`.""" diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 83f19173..5561837a 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -146,6 +146,9 @@ def load_model_and_tokenizer( # Quantization configurations (using bitsandbytes library) if model_args.quantization_bit is not None: + if getattr(config, "quantization_config", None): + raise ValueError("Remove `quantization_bit` if you are using a quantized model.") + if is_deepspeed_zero3_enabled(): raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.") diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index a6e8b2d1..094aa50f 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -38,8 +38,8 @@ def export_model(args: Optional[Dict[str, Any]] = None): model_args, _, finetuning_args, _ = get_infer_args(args) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args) - if getattr(model, "quantization_method", None) == "gptq": - raise ValueError("Cannot export a GPTQ quantized model.") + if getattr(model, "quantization_method", None) in ["gptq", "awq"]: + raise ValueError("Cannot export a GPTQ or AWQ quantized model.") model.config.use_cache = True model.save_pretrained(finetuning_args.export_dir, max_shard_size="{}GB".format(finetuning_args.export_size))