mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-02-26 07:45:59 +08:00
26 lines
400 B
YAML
26 lines
400 B
YAML
model: Qwen/Qwen3-0.6B
|
|
|
|
model_class: llm
|
|
|
|
template: qwen3_nothink
|
|
|
|
kernel_config:
|
|
name: auto
|
|
include_kernels: auto
|
|
|
|
dist_config:
|
|
name: deepspeed
|
|
config_file: examples/deepspeed/ds_z3_config.json
|
|
|
|
### data
|
|
train_dataset: data/v1_sft_demo.yaml
|
|
|
|
### training
|
|
output_dir: outputs/Qwen3-0.6B-deepspeed
|
|
micro_batch_size: 1
|
|
cutoff_len: 2048
|
|
learning_rate: 1.0e-4
|
|
bf16: true
|
|
max_steps: 10
|
|
|