diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1f469955b..89cdd2234 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -25,16 +25,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.10' - + - name: Install dependencies run: | pip install -r docs/requirements.txt - + - name: Build Sphinx run: | sphinx-build -b html docs/zh docs/_build/html/zh @@ -56,10 +56,10 @@ jobs: > docs/_build/html/index.html touch docs/_build/html/.nojekyll - + - name: Setup Pages uses: actions/configure-pages@v5 - + - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: diff --git a/README.md b/README.md index 9032f81fd..57e152405 100644 --- a/README.md +++ b/README.md @@ -291,7 +291,7 @@ Read technical notes: | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss | | [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 | -| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small | +| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small| | [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 | | [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl | | [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 | @@ -319,6 +319,7 @@ Read technical notes: | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | +| [Qwen3.5](https://huggingface.co/Qwen) | 27B/35B/122B/397B | qwen3_5 | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni | diff --git a/README_zh.md b/README_zh.md index 81331e3b1..ba2a1c546 100644 --- a/README_zh.md +++ b/README_zh.md @@ -293,7 +293,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - | | [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss | | [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 | -| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small | +| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small| | [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 | | [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl | | [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 | @@ -321,6 +321,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | +| [Qwen3.5](https://huggingface.co/Qwen) | 27B/35B/122B/397B | qwen3_5 | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | | [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni | | [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni | diff --git a/docs/_static/css/lang-switcher.css b/docs/_static/css/lang-switcher.css index f70f73d51..aaf2df88f 100644 --- a/docs/_static/css/lang-switcher.css +++ b/docs/_static/css/lang-switcher.css @@ -47,4 +47,3 @@ border-color: rgba(255, 255, 255, 0.45); box-shadow: 0 0 0 3px rgba(255, 255, 255, 0.12); } - diff --git a/docs/conf.py b/docs/conf.py index 1f10ca30a..46496019a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,33 +1,31 @@ # Configuration file for the Sphinx documentation builder. -import os -import sys # Define common settings here -project = 'LlamaFactory' -copyright = '2024, LlamaFactory Team' -author = 'LlamaFactory Team' +project = "LlamaFactory" +copyright = "2024, LlamaFactory Team" +author = "LlamaFactory Team" extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', - 'myst_parser', + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "myst_parser", ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" -html_static_path = ['_static'] +html_static_path = ["_static"] html_js_files = [ - 'js/switcher.js', + "js/switcher.js", ] html_css_files = [ - 'css/lang-switcher.css', + "css/lang-switcher.css", ] myst_enable_extensions = [ diff --git a/docs/en/conf.py b/docs/en/conf.py index 193c8a4c6..b53f1840a 100644 --- a/docs/en/conf.py +++ b/docs/en/conf.py @@ -1,20 +1,22 @@ import os import sys -# Add parent dir to path to allow importing conf.py -sys.path.insert(0, os.path.abspath('..')) -from conf import * +# Add parent dir to path to allow importing conf.py +sys.path.insert(0, os.path.abspath("..")) + +from conf import * # noqa: F403 + # Language settings -language = 'en' -html_search_language = 'en' +language = "en" +html_search_language = "en" # Static files # Point to the root _static directory -html_static_path = ['../_static'] +html_static_path = ["../_static"] # Add custom JS for language switcher html_js_files = [ - 'js/switcher.js', + "js/switcher.js", ] diff --git a/docs/zh/conf.py b/docs/zh/conf.py index 5d97ec821..4af1c7be5 100644 --- a/docs/zh/conf.py +++ b/docs/zh/conf.py @@ -1,20 +1,22 @@ import os import sys -# Add parent dir to path to allow importing conf.py -sys.path.insert(0, os.path.abspath('..')) -from conf import * +# Add parent dir to path to allow importing conf.py +sys.path.insert(0, os.path.abspath("..")) + +from conf import * # noqa: F403 + # Language settings -language = 'zh_CN' -html_search_language = 'zh' +language = "zh_CN" +html_search_language = "zh" # Static files # Point to the root _static directory -html_static_path = ['../_static'] +html_static_path = ["../_static"] # Add custom JS for language switcher html_js_files = [ - 'js/switcher.js', + "js/switcher.js", ] diff --git a/examples/v1/train_full/train_full_deepspeed.yaml b/examples/v1/train_full/train_full_deepspeed.yaml index 29d9353cd..2b9a6642e 100644 --- a/examples/v1/train_full/train_full_deepspeed.yaml +++ b/examples/v1/train_full/train_full_deepspeed.yaml @@ -6,14 +6,14 @@ template: qwen3_nothink kernel_config: name: auto - include_kernels: auto + include_kernels: auto dist_config: name: deepspeed config_file: examples/deepspeed/ds_z3_config.json ### data -train_dataset: data/v1_sft_demo.yaml +train_dataset: data/v1_sft_demo.yaml ### training output_dir: outputs/Qwen3-0.6B-deepspeed @@ -22,4 +22,3 @@ cutoff_len: 2048 learning_rate: 1.0e-4 bf16: true max_steps: 10 - diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index a909032c9..581105a60 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -2810,6 +2810,29 @@ register_model_group( ) +register_model_group( + models={ + "Qwen3.5-27B": { + DownloadSource.DEFAULT: "Qwen/Qwen3.5-27B", + DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-27B", + }, + "Qwen3.5-35B-A3B": { + DownloadSource.DEFAULT: "Qwen/Qwen3.5-35B-A3B", + DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-35B-A3B", + }, + "Qwen3.5-122B-A10B": { + DownloadSource.DEFAULT: "Qwen/Qwen3.5-122B-A10B", + DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-122B-A10B", + }, + "Qwen3.5-397B-A17B": { + DownloadSource.DEFAULT: "Qwen/Qwen3.5-397B-A17B", + DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-397B-A17B", + }, + }, + template="qwen3_5", +) + + register_model_group( models={ "Qwen2-Audio-7B": { diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index fdff829dc..d89019865 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -147,6 +147,7 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: _set_z3_leaf_modules(model, [Qwen3NextSparseMoeBlock]) + def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None: if not is_trainable or not model_args.moe_aux_loss_coef: return diff --git a/src/llamafactory/train/mca/workflow.py b/src/llamafactory/train/mca/workflow.py index 819b840ef..142e86526 100644 --- a/src/llamafactory/train/mca/workflow.py +++ b/src/llamafactory/train/mca/workflow.py @@ -110,6 +110,7 @@ def _freeze_model_parameters(model: Any, finetuning_args: "FinetuningArguments") if any(name.startswith(k) for k in params_to_freeze): p.requires_grad_(False) + def run_pt( model_args: "ModelArguments", data_args: "DataArguments", diff --git a/tests/data/test_collator.py b/tests/data/test_collator.py index 7222a8658..d62ee7a48 100644 --- a/tests/data/test_collator.py +++ b/tests/data/test_collator.py @@ -122,6 +122,8 @@ def test_multimodal_collator(): **tokenizer_module["processor"].image_processor(fake_image), } if not is_transformers_version_greater_than("5.0.0"): + # adapt position_ids and rope_deltas for transformers < 5.0.0 + # https://github.com/huggingface/transformers/pull/43972 expected_input["position_ids"] = [[[0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1]]] * 3 expected_input["rope_deltas"] = [[-8]]