[v1] support deepspeed (#10181)

This commit is contained in:
浮梦
2026-02-12 17:24:30 +08:00
committed by GitHub
parent 675ce8cc7f
commit 5c52afa30d
5 changed files with 257 additions and 47 deletions

View File

@@ -0,0 +1,25 @@
model: Qwen/Qwen3-0.6B
model_class: llm
template: qwen3_nothink
kernel_config:
name: auto
include_kernels: auto
dist_config:
name: deepspeed
config_file: examples/deepspeed/ds_z3_config.json
### data
train_dataset: data/v1_sft_demo.yaml
### training
output_dir: outputs/Qwen3-0.6B-deepspeed
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10