From 15b321da8e3847922a0f7276304efae7c8fbf6c5 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Wed, 13 Dec 2023 01:53:46 +0800
Subject: [PATCH] remove loftq

Former-commit-id: 3a8a50d4d42082b3bdce549653b398e49f2eb554
---
 README.md                               |  2 --
 README_zh.md                            |  2 --
 src/llmtuner/hparams/finetuning_args.py |  4 ----
 src/llmtuner/model/adapter.py           | 13 +----------
 src/llmtuner/model/loader.py            | 30 +++++++++++--------------
 5 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index bb23ba36..2850ac2a 100644
--- a/README.md
+++ b/README.md
@@ -55,8 +55,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
-[23/12/12] We supported **[LoftQ](https://arxiv.org/abs/2310.08659)** training. Try `loftq_init` argument with [a patched peft](https://github.com/hiyouga/peft) for better QLoRA performance.
-
 [23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
 
 [23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#use-modelscope-hub-optional) for usage.
diff --git a/README_zh.md b/README_zh.md
index ae5d5106..87c9bab8 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -55,8 +55,6 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 
 ## 更新日志
 
-[23/12/12] 我们支持了 **[LoftQ](https://arxiv.org/abs/2310.08659)** 训练。请使用 `loftq_init` 参数以及[修复后的 peft](https://github.com/hiyouga/peft) 以提升 QLoRA 性能。
-
 [23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
 
 [23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#使用魔搭社区可跳过)。
diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py
index 87f8f8a4..ae3a6f79 100644
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -55,10 +55,6 @@ class LoraArguments:
                   Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
                   Others choices: the same as LLaMA."}
     )
-    loftq_init: Optional[bool] = field(
-        default=False,
-        metadata={"help": "Use LoftQ initialization for quantized LoRA fine-tuning."}
-    )
     resume_lora_training: Optional[bool] = field(
         default=True,
         metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
diff --git a/src/llmtuner/model/adapter.py b/src/llmtuner/model/adapter.py
index 9404e707..72cea444 100644
--- a/src/llmtuner/model/adapter.py
+++ b/src/llmtuner/model/adapter.py
@@ -91,16 +91,6 @@ def init_adapter(
             else:
                 target_modules = finetuning_args.lora_target
 
-            config_kwargs = {}
-            if model_args.quantization_bit is not None and finetuning_args.loftq_init:
-                if model_args.quantization_bit != 4:
-                    raise ValueError("LoftQ initialization only support 4-bit quantized training.")
-
-                from peft import LoftQConfig # type: ignore
-                loftq_config = LoftQConfig(loftq_bits=4)
-                config_kwargs["init_lora_weights"] = "loftq"
-                config_kwargs["loftq_config"] = loftq_config
-
             lora_config = LoraConfig(
                 task_type=TaskType.CAUSAL_LM,
                 inference_mode=False,
@@ -108,8 +98,7 @@ def init_adapter(
                 lora_alpha=finetuning_args.lora_alpha,
                 lora_dropout=finetuning_args.lora_dropout,
                 target_modules=target_modules,
-                modules_to_save=finetuning_args.additional_target,
-                **config_kwargs
+                modules_to_save=finetuning_args.additional_target
             )
             model = get_peft_model(model, lora_config)
 
diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py
index e3d6ebba..249f4734 100644
--- a/src/llmtuner/model/loader.py
+++ b/src/llmtuner/model/loader.py
@@ -151,25 +151,21 @@ def load_model_and_tokenizer(
         if is_deepspeed_zero3_enabled():
             raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
 
-        if finetuning_args.loftq_init:
-            require_version("peft>=0.7.1.dev0", "To fix: pip install git+https://github.com/hiyouga/peft.git")
-            logger.info("Skip bnb quantization because using loftq.")
-        else:
-            if model_args.quantization_bit == 8:
-                require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
-                config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
+        if model_args.quantization_bit == 8:
+            require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
+            config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
 
-            if model_args.quantization_bit == 4:
-                require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
-                config_kwargs["quantization_config"] = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_compute_dtype=model_args.compute_dtype,
-                    bnb_4bit_use_double_quant=model_args.double_quantization,
-                    bnb_4bit_quant_type=model_args.quantization_type
-                )
+        if model_args.quantization_bit == 4:
+            require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
+            config_kwargs["quantization_config"] = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=model_args.compute_dtype,
+                bnb_4bit_use_double_quant=model_args.double_quantization,
+                bnb_4bit_quant_type=model_args.quantization_type
+            )
 
-            config_kwargs["device_map"] = {"": get_current_device()}
-            logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
+        config_kwargs["device_map"] = {"": get_current_device()}
+        logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
 
     # Load pre-trained models (without valuehead)
     model = AutoModelForCausalLM.from_pretrained(