model: Qwen/Qwen3-0.6B model_class: llm template: qwen3_nothink kernel_config: name: auto include_kernels: auto dist_config: name: deepspeed config_file: examples/deepspeed/ds_z3_config.json ### data train_dataset: data/v1_sft_demo.yaml ### training output_dir: outputs/Qwen3-0.6B-deepspeed micro_batch_size: 1 cutoff_len: 2048 learning_rate: 1.0e-4 bf16: true max_steps: 10