From 15b321da8e3847922a0f7276304efae7c8fbf6c5 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 13 Dec 2023 01:53:46 +0800 Subject: [PATCH] remove loftq Former-commit-id: 3a8a50d4d42082b3bdce549653b398e49f2eb554 --- README.md | 2 -- README_zh.md | 2 -- src/llmtuner/hparams/finetuning_args.py | 4 ---- src/llmtuner/model/adapter.py | 13 +---------- src/llmtuner/model/loader.py | 30 +++++++++++-------------- 5 files changed, 14 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index bb23ba36..2850ac2a 100644 --- a/README.md +++ b/README.md @@ -55,8 +55,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[23/12/12] We supported **[LoftQ](https://arxiv.org/abs/2310.08659)** training. Try `loftq_init` argument with [a patched peft](https://github.com/hiyouga/peft) for better QLoRA performance. - [23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement). [23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#use-modelscope-hub-optional) for usage. diff --git a/README_zh.md b/README_zh.md index ae5d5106..87c9bab8 100644 --- a/README_zh.md +++ b/README_zh.md @@ -55,8 +55,6 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 ## 更新日志 -[23/12/12] 我们支持了 **[LoftQ](https://arxiv.org/abs/2310.08659)** 训练。请使用 `loftq_init` 参数以及[修复后的 peft](https://github.com/hiyouga/peft) 以提升 QLoRA 性能。 - [23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。 [23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#使用魔搭社区可跳过)。 diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index 87f8f8a4..ae3a6f79 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -55,10 +55,6 @@ class LoraArguments: Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \ Others choices: the same as LLaMA."} ) - loftq_init: Optional[bool] = field( - default=False, - metadata={"help": "Use LoftQ initialization for quantized LoRA fine-tuning."} - ) resume_lora_training: Optional[bool] = field( default=True, metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."} diff --git a/src/llmtuner/model/adapter.py b/src/llmtuner/model/adapter.py index 9404e707..72cea444 100644 --- a/src/llmtuner/model/adapter.py +++ b/src/llmtuner/model/adapter.py @@ -91,16 +91,6 @@ def init_adapter( else: target_modules = finetuning_args.lora_target - config_kwargs = {} - if model_args.quantization_bit is not None and finetuning_args.loftq_init: - if model_args.quantization_bit != 4: - raise ValueError("LoftQ initialization only support 4-bit quantized training.") - - from peft import LoftQConfig # type: ignore - loftq_config = LoftQConfig(loftq_bits=4) - config_kwargs["init_lora_weights"] = "loftq" - config_kwargs["loftq_config"] = loftq_config - lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, @@ -108,8 +98,7 @@ def init_adapter( lora_alpha=finetuning_args.lora_alpha, lora_dropout=finetuning_args.lora_dropout, target_modules=target_modules, - modules_to_save=finetuning_args.additional_target, - **config_kwargs + modules_to_save=finetuning_args.additional_target ) model = get_peft_model(model, lora_config) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index e3d6ebba..249f4734 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -151,25 +151,21 @@ def load_model_and_tokenizer( if is_deepspeed_zero3_enabled(): raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.") - if finetuning_args.loftq_init: - require_version("peft>=0.7.1.dev0", "To fix: pip install git+https://github.com/hiyouga/peft.git") - logger.info("Skip bnb quantization because using loftq.") - else: - if model_args.quantization_bit == 8: - require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0") - config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True) + if model_args.quantization_bit == 8: + require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0") + config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True) - if model_args.quantization_bit == 4: - require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0") - config_kwargs["quantization_config"] = BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_compute_dtype=model_args.compute_dtype, - bnb_4bit_use_double_quant=model_args.double_quantization, - bnb_4bit_quant_type=model_args.quantization_type - ) + if model_args.quantization_bit == 4: + require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0") + config_kwargs["quantization_config"] = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=model_args.compute_dtype, + bnb_4bit_use_double_quant=model_args.double_quantization, + bnb_4bit_quant_type=model_args.quantization_type + ) - config_kwargs["device_map"] = {"": get_current_device()} - logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) + config_kwargs["device_map"] = {"": get_current_device()} + logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) # Load pre-trained models (without valuehead) model = AutoModelForCausalLM.from_pretrained(