diff --git a/README.md b/README.md index 151d30b5..81097819 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,8 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details. + [24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `--dataset glaive_toolcall`. [23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `--use_unsloth` argument to activate unsloth patch. It achieves 1.7x speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details. @@ -110,6 +112,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Mixtral](https://huggingface.co/mistralai) | 8x7B | q_proj,v_proj | mistral | | [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | +| [Qwen1.5](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/72B | q_proj,v_proj | qwen | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | | [Yi](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi | | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | diff --git a/README_zh.md b/README_zh.md index 39e8e12b..623d799b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -55,6 +55,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 ## 更新日志 +[24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。 + [24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `--dataset glaive_toolcall` 即可使模型获得工具调用能力。 [23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `--use_unsloth` 参数启用 unsloth 优化。该方法可提供 1.7 倍的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。 @@ -110,6 +112,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 | [Mixtral](https://huggingface.co/mistralai) | 8x7B | q_proj,v_proj | mistral | | [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | +| [Qwen1.5](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/72B | q_proj,v_proj | qwen | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | | [Yi](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi | | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index a9818e58..73a90a84 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -246,6 +246,7 @@ register_model_group( DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base", }, + "DeepSeekCoder-7B-Base": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5"}, "DeepSeekCoder-33B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base", @@ -254,6 +255,7 @@ register_model_group( DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct", }, + "DeepSeekCoder-7B-Chat": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5"}, "DeepSeekCoder-33B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct", @@ -555,6 +557,109 @@ register_model_group( ) +register_model_group( + models={ + "Qwen1.5-0.5B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B", + }, + "Qwen1.5-1.8B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B", + }, + "Qwen1.5-4B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B", + }, + "Qwen1.5-7B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B", + }, + "Qwen1.5-14B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B", + }, + "Qwen1.5-72B": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B", + }, + "Qwen1.5-0.5B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat", + }, + "Qwen1.5-1.8B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat", + }, + "Qwen1.5-4B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat", + }, + "Qwen1.5-7B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat", + }, + "Qwen1.5-14B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat", + }, + "Qwen1.5-72B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat", + }, + "Qwen1.5-0.5B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8", + }, + "Qwen1.5-0.5B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4", + }, + "Qwen1.5-1.8B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8", + }, + "Qwen1.5-1.8B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4", + }, + "Qwen1.5-4B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int8", + }, + "Qwen1.5-4B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int4", + }, + "Qwen1.5-7B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int8", + }, + "Qwen1.5-7B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int4", + }, + "Qwen1.5-14B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int8", + }, + "Qwen1.5-14B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int4", + }, + "Qwen1.5-72B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int8", + }, + "Qwen1.5-72B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int4", + }, + }, + template="qwen", +) + + register_model_group( models={ "SOLAR-10.7B": {DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0"},