mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-01-09 23:50:36 +08:00
[v1] Refactor kernel plugin (#9669)
Co-authored-by: frozenleaves <frozen@Mac.local>
This commit is contained in:
71
tests_v1/config/test_args_parser.py
Normal file
71
tests_v1/config/test_args_parser.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
|
||||
from llamafactory.v1.config.arg_parser import get_args
|
||||
|
||||
|
||||
def test_get_args_from_yaml(tmp_path: pathlib.Path):
|
||||
config_yaml = """
|
||||
### model
|
||||
model: "llamafactory/tiny-random-qwen2.5"
|
||||
trust_remote_code: true
|
||||
use_fast_processor: true
|
||||
model_class: "llm"
|
||||
kernel_config:
|
||||
name: "auto"
|
||||
include_kernels: "auto" # choice: null/true/false/auto/kernel_id1,kernel_id2,kernel_id3, default is null
|
||||
peft_config:
|
||||
name: "lora"
|
||||
lora_rank: 0.8
|
||||
quant_config: null
|
||||
|
||||
### data
|
||||
dataset: "llamafactory/tiny-supervised-dataset"
|
||||
cutoff_len: 2048
|
||||
|
||||
### training
|
||||
output_dir: "outputs/test_run"
|
||||
micro_batch_size: 1
|
||||
global_batch_size: 1
|
||||
learning_rate: 1.0e-4
|
||||
bf16: false
|
||||
dist_config: null
|
||||
|
||||
### sample
|
||||
sample_backend: "hf"
|
||||
max_new_tokens: 128
|
||||
"""
|
||||
|
||||
config_file = tmp_path / "config.yaml"
|
||||
config_file.write_text(config_yaml, encoding="utf-8")
|
||||
|
||||
test_argv = ["test_args_parser.py", str(config_file)]
|
||||
|
||||
with patch.object(sys, "argv", test_argv):
|
||||
data_args, model_args, training_args, sample_args = get_args()
|
||||
assert training_args.output_dir == "outputs/test_run"
|
||||
assert training_args.micro_batch_size == 1
|
||||
assert training_args.global_batch_size == 1
|
||||
assert training_args.learning_rate == 1.0e-4
|
||||
assert training_args.bf16 is False
|
||||
assert training_args.dist_config is None
|
||||
assert model_args.model == "llamafactory/tiny-random-qwen2.5"
|
||||
assert model_args.kernel_config.name == "auto"
|
||||
assert model_args.kernel_config.get("include_kernels") == "auto"
|
||||
assert model_args.peft_config.name == "lora"
|
||||
assert model_args.peft_config.get("lora_rank") == 0.8
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
import torch
|
||||
|
||||
from llamafactory.v1.config.model_args import ModelArguments
|
||||
from llamafactory.v1.config.model_args import ModelArguments, PluginConfig
|
||||
from llamafactory.v1.core.model_loader import ModelLoader
|
||||
|
||||
|
||||
@@ -29,5 +29,23 @@ def test_tiny_qwen():
|
||||
assert model_loader.model.dtype == torch.bfloat16
|
||||
|
||||
|
||||
def test_tiny_qwen_with_kernel_plugin():
|
||||
from transformers import Qwen2ForCausalLM
|
||||
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.ops.rms_norm.npu_rms_norm import npu_rms_norm_forward
|
||||
|
||||
model_args = ModelArguments(
|
||||
model="llamafactory/tiny-random-qwen2.5", kernel_config=PluginConfig(name="auto", include_kernels="auto")
|
||||
)
|
||||
model_loader = ModelLoader(model_args)
|
||||
# test enable apply kernel plugin
|
||||
if hasattr(torch, "npu"):
|
||||
assert model_loader.model.model.layers[0].input_layernorm.forward.__code__ == npu_rms_norm_forward.__code__
|
||||
else:
|
||||
assert model_loader.model.model.layers[0].input_layernorm.forward.__code__ != npu_rms_norm_forward.__code__
|
||||
assert isinstance(model_loader.model, Qwen2ForCausalLM)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_tiny_qwen()
|
||||
test_tiny_qwen_with_kernel_plugin()
|
||||
|
||||
@@ -12,16 +12,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
from llamafactory.v1.accelerator.helper import get_current_accelerator
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.mlp import npu_swiglu
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.registry import apply_available_kernels, apply_kernel
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.rms_norm import npu_rms_norm
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.rope import npu_rope
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -29,24 +26,29 @@ def clear_accelerator_cache():
|
||||
get_current_accelerator.cache_clear()
|
||||
|
||||
|
||||
def reload_kernels():
|
||||
"""Helper to reload kernel modules to respect mocked accelerator."""
|
||||
# Unload kernel interface and registry
|
||||
keys_to_remove = [k for k in sys.modules if k.startswith("llamafactory.v1.plugins.model_plugins.kernels")]
|
||||
for k in keys_to_remove:
|
||||
del sys.modules[k]
|
||||
|
||||
|
||||
@patch("torch.accelerator.current_accelerator")
|
||||
def test_apply_kernel(mock_get_accelerator: MagicMock):
|
||||
mock_device = MagicMock()
|
||||
setattr(mock_device, "type", "npu")
|
||||
mock_get_accelerator.return_value = mock_device
|
||||
# Force reload of kernels with mocked accelerator
|
||||
reload_kernels()
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.interface import apply_default_kernels
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen2.5")
|
||||
|
||||
original_rmsnorm_forward = model.model.layers[0].input_layernorm.forward
|
||||
original_swiglu_forward = model.model.layers[0].mlp.forward
|
||||
|
||||
apply_kernel(model, npu_rope.NpuRoPEKernel)
|
||||
|
||||
model = apply_kernel(model, npu_rms_norm.NpuRMSNormKernel)
|
||||
assert model.model.layers[0].input_layernorm is not original_rmsnorm_forward
|
||||
|
||||
model = apply_kernel(model, npu_swiglu.NpuSwiGluKernel)
|
||||
assert model.model.layers[0].mlp.forward is not original_swiglu_forward
|
||||
model = apply_default_kernels(model=model, include_kernels="npu_fused_rmsnorm")
|
||||
assert model.model.layers[0].input_layernorm.forward.__func__ is not original_rmsnorm_forward.__func__
|
||||
assert model.model.layers[0].mlp.forward.__func__ is original_swiglu_forward.__func__
|
||||
|
||||
|
||||
@patch("torch.accelerator.current_accelerator")
|
||||
@@ -56,12 +58,15 @@ def test_apply_all_kernels(mock_get_accelerator: MagicMock):
|
||||
setattr(mock_device, "type", "npu")
|
||||
mock_get_accelerator.return_value = mock_device
|
||||
|
||||
# Force reload of kernels with mocked accelerator
|
||||
reload_kernels()
|
||||
from llamafactory.v1.plugins.model_plugins.kernels.interface import apply_default_kernels
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen2.5")
|
||||
|
||||
original_rmsnorm_forward = model.model.layers[0].input_layernorm.forward
|
||||
original_swiglu_forward = model.model.layers[0].mlp.forward
|
||||
|
||||
model = apply_available_kernels(model)
|
||||
|
||||
assert model.model.layers[0].input_layernorm is not original_rmsnorm_forward
|
||||
assert model.model.layers[0].mlp.forward is not original_swiglu_forward
|
||||
model = apply_default_kernels(model=model, include_kernels=True)
|
||||
assert model.model.layers[0].input_layernorm.forward.__func__ is not original_rmsnorm_forward.__func__
|
||||
assert model.model.layers[0].mlp.forward.__func__ is not original_swiglu_forward.__func__
|
||||
|
||||
Reference in New Issue
Block a user