support pissa

Former-commit-id: 8c1046d78a
2026-03-06 19:56:01 +08:00 · 2024-06-16 01:08:12 +08:00
parent c0c6b8075a
commit f25b8626bf
19 changed files with 406 additions and 76 deletions
--- a/scripts/loftq_init.py
+++ b/scripts/loftq_init.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's PEFT library.
+# This code is based on the HuggingFace's PEFT library.
 # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,11 +17,9 @@
 # limitations under the License.

 import os
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING

 import fire
-import torch
-import torch.nn as nn
 from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
 from transformers import AutoModelForCausalLM, AutoTokenizer

@@ -30,41 +28,20 @@ if TYPE_CHECKING:
    from transformers import PreTrainedModel


-class Shell(nn.Module):
-    def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
-        super().__init__()
-        self.weight = nn.Parameter(weight, requires_grad=False)
-        if bias is not None:
-            self.bias = nn.Parameter(bias, requires_grad=False)
-
-
-def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
-    for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
-        parent_name = ".".join(name.split(".")[:-1])
-        child_name = name.split(".")[-1]
-        parent_module = model.get_submodule(parent_name)
-        child_module = getattr(parent_module, child_name)
-        base_layer = getattr(child_module, "base_layer")
-        weight = getattr(base_layer, "weight", None)
-        bias = getattr(base_layer, "bias", None)
-        setattr(parent_module, child_name, Shell(weight, bias))
-
-    print("Model unwrapped.")
-
-
 def quantize_loftq(
    model_name_or_path: str,
-    save_dir: str,
-    loftq_bits: Optional[int] = 4,
-    loftq_iter: Optional[int] = 1,
-    lora_alpha: Optional[int] = None,
-    lora_rank: Optional[int] = 16,
-    lora_target: Optional[str] = "q_proj,v_proj",
-    save_safetensors: Optional[bool] = False,
+    output_dir: str,
+    loftq_bits: int = 4,
+    loftq_iter: int = 4,
+    lora_alpha: int = None,
+    lora_rank: int = 16,
+    lora_dropout: float = 0,
+    lora_target: str = "q_proj,v_proj",
+    save_safetensors: bool = True,
 ):
    r"""
    Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
-    Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
+    Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
@@ -74,25 +51,34 @@ def quantize_loftq(
        inference_mode=True,
        r=lora_rank,
        lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
-        lora_dropout=0.1,
+        lora_dropout=lora_dropout,
        target_modules=[name.strip() for name in lora_target.split(",")],
        init_lora_weights="loftq",
        loftq_config=loftq_config,
    )

    # Init LoftQ model
-    lora_model = get_peft_model(model, lora_config)
-    base_model: "PreTrainedModel" = lora_model.get_base_model()
+    print("Initializing LoftQ weights, it may be take several minutes, wait patiently.")
+    peft_model = get_peft_model(model, lora_config)
+    loftq_dir = os.path.join(output_dir, "loftq_init")

    # Save LoftQ model
-    setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
-    setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
-    lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
+    setattr(peft_model.peft_config["default"], "base_model_name_or_path", output_dir)
+    setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply loftq again
+    peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors)
+    print("Adapter weights saved in {}".format(loftq_dir))

    # Save base model
-    unwrap_model(base_model)
-    base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
-    tokenizer.save_pretrained(save_dir)
+    base_model: "PreTrainedModel" = peft_model.unload()
+    base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(output_dir)
+    print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("model_name_or_path: {}".format(output_dir))
+    print("adapter_name_or_path: {}".format(loftq_dir))
+    print("finetuning_type: lora")
+    print("quantization_bit: {}".format(loftq_bits))


 if __name__ == "__main__":
--- a/scripts/pissa_init.py
+++ b/scripts/pissa_init.py
@@ -0,0 +1,79 @@
+# coding=utf-8
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is based on the HuggingFace's PEFT library.
+# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import TYPE_CHECKING
+
+import fire
+from peft import LoraConfig, TaskType, get_peft_model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
+
+def quantize_pissa(
+    model_name_or_path: str,
+    output_dir: str,
+    pissa_iter: int = 4,
+    lora_alpha: int = None,
+    lora_rank: int = 16,
+    lora_dropout: float = 0,
+    lora_target: str = "q_proj,v_proj",
+    save_safetensors: bool = True,
+):
+    r"""
+    Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
+    Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
+    lora_config = LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        r=lora_rank,
+        lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
+        lora_dropout=lora_dropout,
+        target_modules=[name.strip() for name in lora_target.split(",")],
+        init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter)
+    )
+
+    # Init PiSSA model
+    peft_model = get_peft_model(model, lora_config)
+    pissa_dir = os.path.join(output_dir, "pissa_init")
+
+    # Save PiSSA model
+    setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply pissa again
+    peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
+    print("Adapter weights saved in {}".format(pissa_dir))
+
+    # Save base model
+    base_model: "PreTrainedModel" = peft_model.unload()
+    base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(output_dir)
+    print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("model_name_or_path: {}".format(output_dir))
+    print("adapter_name_or_path: {}".format(pissa_dir))
+    print("finetuning_type: lora")
+    print("pissa_convert: true")
+
+
+if __name__ == "__main__":
+    fire.Fire(quantize_pissa)