support qwen1.5

Former-commit-id: ccabb5b04a
2026-01-02 12:10:34 +08:00 · 2024-02-06 00:10:51 +08:00
parent f45ed52161
commit dcfb9b5cfa
3 changed files with 111 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -55,6 +55,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/

 ## Changelog

+[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details.
+
 [24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `--dataset glaive_toolcall`.

 [23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `--use_unsloth` argument to activate unsloth patch. It achieves 1.7x speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details.
@@ -110,6 +112,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Mixtral](https://huggingface.co/mistralai)              | 8x7B                        | q_proj,v_proj     | mistral   |
 | [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                   | q_proj,v_proj     | -         |
 | [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B             | c_attn            | qwen      |
+| [Qwen1.5](https://huggingface.co/Qwen)                   | 0.5B/1.8B/4B/7B/14B/72B     | q_proj,v_proj     | qwen      |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                  | q_proj,v_proj     | xverse    |
 | [Yi](https://huggingface.co/01-ai)                       | 6B/34B                      | q_proj,v_proj     | yi        |
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                 | q_proj,v_proj     | yuan      |
--- a/README_zh.md
+++ b/README_zh.md
@@ -55,6 +55,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846

 ## 更新日志

+[24/02/05] Qwen1.5（Qwen2 测试版）系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。
+
 [24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**，微调时指定 `--dataset glaive_toolcall` 即可使模型获得工具调用能力。

 [23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `--use_unsloth` 参数启用 unsloth 优化。该方法可提供 1.7 倍的训练速度，详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
@@ -110,6 +112,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 | [Mixtral](https://huggingface.co/mistralai)              | 8x7B                        | q_proj,v_proj     | mistral   |
 | [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                   | q_proj,v_proj     | -         |
 | [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B             | c_attn            | qwen      |
+| [Qwen1.5](https://huggingface.co/Qwen)                   | 0.5B/1.8B/4B/7B/14B/72B     | q_proj,v_proj     | qwen      |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                  | q_proj,v_proj     | xverse    |
 | [Yi](https://huggingface.co/01-ai)                       | 6B/34B                      | q_proj,v_proj     | yi        |
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                 | q_proj,v_proj     | yuan      |
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@@ -246,6 +246,7 @@ register_model_group(
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base",
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base",
        },
+        "DeepSeekCoder-7B-Base": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5"},
        "DeepSeekCoder-33B-Base": {
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base",
@@ -254,6 +255,7 @@ register_model_group(
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct",
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct",
        },
+        "DeepSeekCoder-7B-Chat": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5"},
        "DeepSeekCoder-33B-Chat": {
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct",
@@ -555,6 +557,109 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "Qwen1.5-0.5B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B",
+        },
+        "Qwen1.5-1.8B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B",
+        },
+        "Qwen1.5-4B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B",
+        },
+        "Qwen1.5-7B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B",
+        },
+        "Qwen1.5-14B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B",
+        },
+        "Qwen1.5-72B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B",
+        },
+        "Qwen1.5-0.5B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat",
+        },
+        "Qwen1.5-1.8B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat",
+        },
+        "Qwen1.5-4B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat",
+        },
+        "Qwen1.5-7B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat",
+        },
+        "Qwen1.5-14B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat",
+        },
+        "Qwen1.5-72B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat",
+        },
+        "Qwen1.5-0.5B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-0.5B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4",
+        },
+        "Qwen1.5-1.8B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-1.8B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4",
+        },
+        "Qwen1.5-4B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-4B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int4",
+        },
+        "Qwen1.5-7B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-7B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int4",
+        },
+        "Qwen1.5-14B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-14B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int4",
+        },
+        "Qwen1.5-72B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int8",
+        },
+        "Qwen1.5-72B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int4",
+        },
+    },
+    template="qwen",
+)
+
+
 register_model_group(
    models={
        "SOLAR-10.7B": {DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0"},