[v1] add sft (#9752)

This commit is contained in:
Yaowei Zheng
2026-01-12 03:15:01 +08:00
committed by GitHub
parent 4d3621e3d3
commit 958b9c3468
29 changed files with 439 additions and 305 deletions

View File

@@ -19,7 +19,7 @@ from llamafactory.v1.core.model_engine import ModelEngine
def test_init_on_meta():
_, model_args, *_ = get_args(
model_args, *_ = get_args(
dict(
model="llamafactory/tiny-random-qwen3",
init_config={"name": "init_on_meta"},
@@ -30,7 +30,7 @@ def test_init_on_meta():
def test_init_on_rank0():
_, model_args, *_ = get_args(
model_args, *_ = get_args(
dict(
model="llamafactory/tiny-random-qwen3",
init_config={"name": "init_on_rank0"},
@@ -44,7 +44,7 @@ def test_init_on_rank0():
def test_init_on_default():
_, model_args, *_ = get_args(
model_args, *_ = get_args(
dict(
model="llamafactory/tiny-random-qwen3",
init_config={"name": "init_on_default"},

View File

@@ -43,7 +43,8 @@ def test_apply_kernel(mock_get_accelerator: MagicMock):
reload_kernels()
from llamafactory.v1.plugins.model_plugins.kernels.interface import apply_default_kernels
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen3")
# NOTE: use a special model to avoid contamination by other tests
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen2.5")
original_rmsnorm_forward = model.model.layers[0].input_layernorm.forward
original_swiglu_forward = model.model.layers[0].mlp.forward
model = apply_default_kernels(model=model, include_kernels="npu_fused_rmsnorm")
@@ -62,7 +63,8 @@ def test_apply_all_kernels(mock_get_accelerator: MagicMock):
reload_kernels()
from llamafactory.v1.plugins.model_plugins.kernels.interface import apply_default_kernels
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen3")
# NOTE: use a special model to avoid contamination by other tests
model = AutoModelForCausalLM.from_pretrained("llamafactory/tiny-random-qwen2.5")
original_rmsnorm_forward = model.model.layers[0].input_layernorm.forward
original_swiglu_forward = model.model.layers[0].mlp.forward