[model] update constants (#10220)

This commit is contained in:
Yaowei Zheng
2026-02-26 21:13:56 +08:00
committed by GitHub
parent 2b8b871475
commit 122cd46084
12 changed files with 69 additions and 40 deletions

View File

@@ -25,16 +25,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install -r docs/requirements.txt
- name: Build Sphinx
run: |
sphinx-build -b html docs/zh docs/_build/html/zh
@@ -56,10 +56,10 @@ jobs:
> docs/_build/html/index.html
touch docs/_build/html/.nojekyll
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:

View File

@@ -291,7 +291,7 @@ Read technical notes:
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
| [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
@@ -319,6 +319,7 @@ Read technical notes:
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen3.5](https://huggingface.co/Qwen) | 27B/35B/122B/397B | qwen3_5 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni |

View File

@@ -293,7 +293,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
| [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
@@ -321,6 +321,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen3.5](https://huggingface.co/Qwen) | 27B/35B/122B/397B | qwen3_5 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni |

View File

@@ -47,4 +47,3 @@
border-color: rgba(255, 255, 255, 0.45);
box-shadow: 0 0 0 3px rgba(255, 255, 255, 0.12);
}

View File

@@ -1,33 +1,31 @@
# Configuration file for the Sphinx documentation builder.
import os
import sys
# Define common settings here
project = 'LlamaFactory'
copyright = '2024, LlamaFactory Team'
author = 'LlamaFactory Team'
project = "LlamaFactory"
copyright = "2024, LlamaFactory Team"
author = "LlamaFactory Team"
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.viewcode',
'sphinx.ext.napoleon',
'myst_parser',
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"sphinx.ext.napoleon",
"myst_parser",
]
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"
html_static_path = ['_static']
html_static_path = ["_static"]
html_js_files = [
'js/switcher.js',
"js/switcher.js",
]
html_css_files = [
'css/lang-switcher.css',
"css/lang-switcher.css",
]
myst_enable_extensions = [

View File

@@ -1,20 +1,22 @@
import os
import sys
# Add parent dir to path to allow importing conf.py
sys.path.insert(0, os.path.abspath('..'))
from conf import *
# Add parent dir to path to allow importing conf.py
sys.path.insert(0, os.path.abspath(".."))
from conf import * # noqa: F403
# Language settings
language = 'en'
html_search_language = 'en'
language = "en"
html_search_language = "en"
# Static files
# Point to the root _static directory
html_static_path = ['../_static']
html_static_path = ["../_static"]
# Add custom JS for language switcher
html_js_files = [
'js/switcher.js',
"js/switcher.js",
]

View File

@@ -1,20 +1,22 @@
import os
import sys
# Add parent dir to path to allow importing conf.py
sys.path.insert(0, os.path.abspath('..'))
from conf import *
# Add parent dir to path to allow importing conf.py
sys.path.insert(0, os.path.abspath(".."))
from conf import * # noqa: F403
# Language settings
language = 'zh_CN'
html_search_language = 'zh'
language = "zh_CN"
html_search_language = "zh"
# Static files
# Point to the root _static directory
html_static_path = ['../_static']
html_static_path = ["../_static"]
# Add custom JS for language switcher
html_js_files = [
'js/switcher.js',
"js/switcher.js",
]

View File

@@ -6,14 +6,14 @@ template: qwen3_nothink
kernel_config:
name: auto
include_kernels: auto
include_kernels: auto
dist_config:
name: deepspeed
config_file: examples/deepspeed/ds_z3_config.json
### data
train_dataset: data/v1_sft_demo.yaml
train_dataset: data/v1_sft_demo.yaml
### training
output_dir: outputs/Qwen3-0.6B-deepspeed
@@ -22,4 +22,3 @@ cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10

View File

@@ -2810,6 +2810,29 @@ register_model_group(
)
register_model_group(
models={
"Qwen3.5-27B": {
DownloadSource.DEFAULT: "Qwen/Qwen3.5-27B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-27B",
},
"Qwen3.5-35B-A3B": {
DownloadSource.DEFAULT: "Qwen/Qwen3.5-35B-A3B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-35B-A3B",
},
"Qwen3.5-122B-A10B": {
DownloadSource.DEFAULT: "Qwen/Qwen3.5-122B-A10B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-122B-A10B",
},
"Qwen3.5-397B-A17B": {
DownloadSource.DEFAULT: "Qwen/Qwen3.5-397B-A17B",
DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-397B-A17B",
},
},
template="qwen3_5",
)
register_model_group(
models={
"Qwen2-Audio-7B": {

View File

@@ -147,6 +147,7 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
_set_z3_leaf_modules(model, [Qwen3NextSparseMoeBlock])
def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
if not is_trainable or not model_args.moe_aux_loss_coef:
return

View File

@@ -110,6 +110,7 @@ def _freeze_model_parameters(model: Any, finetuning_args: "FinetuningArguments")
if any(name.startswith(k) for k in params_to_freeze):
p.requires_grad_(False)
def run_pt(
model_args: "ModelArguments",
data_args: "DataArguments",

View File

@@ -122,6 +122,8 @@ def test_multimodal_collator():
**tokenizer_module["processor"].image_processor(fake_image),
}
if not is_transformers_version_greater_than("5.0.0"):
# adapt position_ids and rope_deltas for transformers < 5.0.0
# https://github.com/huggingface/transformers/pull/43972
expected_input["position_ids"] = [[[0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1]]] * 3
expected_input["rope_deltas"] = [[-8]]