support llama pro #2338 , add rslora

Former-commit-id: 40d659b7f30dd5a004703c176ec1f22dc864e505
2026-06-26 09:08:53 +08:00 · 2024-02-15 02:27:36 +08:00
parent 3fa3e7542f
commit 562b9d0167
24 changed files with 438 additions and 203 deletions
--- a/tests/llama_pro.py
+++ b/tests/llama_pro.py
@@ -0,0 +1,108 @@
+# coding=utf-8
+# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
+# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
+# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
+
+import json
+import os
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig, PreTrainedModel
+
+
+def block_expansion(
+    model_name_or_path: str,
+    output_dir: str,
+    num_expand: int,
+    shard_size: Optional[str] = "2GB",
+    save_safetensors: Optional[bool] = False,
+):
+    config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
+    num_layers = getattr(config, "num_hidden_layers")
+    setattr(config, "num_hidden_layers", num_layers + num_expand)
+    config.save_pretrained(output_dir)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    tokenizer.save_pretrained(output_dir)
+
+    model: "PreTrainedModel" = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        torch_dtype="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+    )
+    state_dict = model.state_dict()
+
+    if num_layers % num_expand != 0:
+        raise ValueError("`num_layers` {} should be divisible by `num_expand` {}.".format(num_layers, num_expand))
+
+    split = num_layers // num_expand
+    layer_cnt = 0
+    output_state_dict = OrderedDict()
+    for i in range(num_layers):
+        for key, value in state_dict.items():
+            if ".{:d}.".format(i) in key:
+                output_state_dict[key.replace(".{:d}.".format(i), ".{:d}.".format(layer_cnt))] = value
+
+        print("Add layer {} copied from layer {}".format(layer_cnt, i))
+        layer_cnt += 1
+        if (i + 1) % split == 0:
+            for key, value in state_dict.items():
+                if ".{:d}.".format(i) in key:
+                    if "down_proj" in key or "o_proj" in key:
+                        output_state_dict[key.replace(".{:d}.".format(i), ".{:d}.".format(layer_cnt))] = (
+                            torch.zeros_like(value)
+                        )
+                    else:
+                        output_state_dict[key.replace(".{:d}.".format(i), ".{:d}.".format(layer_cnt))] = value
+
+            print("Add layer {} expanded from layer {}".format(layer_cnt, i))
+            layer_cnt += 1
+
+    for key, value in state_dict.items():
+        if key not in output_state_dict:
+            output_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("  --model_name_or_path {} \\".format(output_dir))
+    print("  --finetuning_type freeze \\")
+    print("  --name_module_trainable all \\")
+    print("  --num_layer_trainable {} \\".format(num_expand))
+    print("  --use_llama_pro")
+
+
+if __name__ == "__main__":
+    fire.Fire(block_expansion)
--- a/tests/llamafy_baichuan2.py
+++ b/tests/llamafy_baichuan2.py
@@ -1,6 +1,6 @@
 # coding=utf-8
 # Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
-# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output --shard_size 10GB
+# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
 # Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
 # Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied

@@ -76,7 +76,9 @@ def save_config(input_dir: str, output_dir: str):
    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))


-def llamafy_baichuan2(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False):
+def llamafy_baichuan2(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
    try:
        os.makedirs(output_dir, exist_ok=False)
    except Exception as e:
--- a/tests/llamafy_internlm2.py
+++ b/tests/llamafy_internlm2.py
@@ -1,6 +1,6 @@
 # coding=utf-8
 # Converts the InternLM2 model in the same format as LLaMA2.
-# Usage: python llamafy_internlm2.py --input_dir input --output_dir output --shard_size 10GB
+# Usage: python llamafy_internlm2.py --input_dir input --output_dir output
 # Warning: We have found that the converted model cannot infer correctly. It will be fixed later.

 import json
@@ -98,7 +98,9 @@ def save_config(input_dir: str, output_dir: str):
    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))


-def llamafy_internlm2(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False):
+def llamafy_internlm2(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
    try:
        os.makedirs(output_dir, exist_ok=False)
    except Exception as e:
--- a/tests/llamafy_qwen.py
+++ b/tests/llamafy_qwen.py
@@ -1,6 +1,6 @@
 # coding=utf-8
 # Converts the Qwen models in the same format as LLaMA2.
-# Usage: python llamafy_qwen.py --input_dir input --output_dir output --shard_size 10GB
+# Usage: python llamafy_qwen.py --input_dir input --output_dir output
 # Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied

 import json
@@ -128,7 +128,9 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))


-def llamafy_qwen(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False):
+def llamafy_qwen(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
    try:
        os.makedirs(output_dir, exist_ok=False)
    except Exception as e:
--- a/tests/loftq_init.py
+++ b/tests/loftq_init.py
@@ -26,7 +26,7 @@ class Shell(nn.Module):


 def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
-    for name in set([k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k]):  # noqa: C403
+    for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
        parent_name = ".".join(name.split(".")[:-1])
        child_name = name.split(".")[-1]
        parent_module = model.get_submodule(parent_name)
--- a/tests/test_toolcall.py
+++ b/tests/test_toolcall.py
@@ -1,13 +1,10 @@
 import json
-import os
 from typing import Sequence

 from openai import OpenAI
 from transformers.utils.versions import require_version


-os.environ["OPENAI_BASE_URL"] = "http://192.168.0.1:8000/v1"
-os.environ["OPENAI_API_KEY"] = "0"
 require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")


@@ -24,7 +21,10 @@ tool_map = {"calculate_gpa": calculate_gpa}


 if __name__ == "__main__":
-    client = OpenAI()
+    client = OpenAI(
+        api_key="0",
+        base_url="http://localhost:8000/v1",
+    )
    tools = [
        {
            "type": "function",