mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-02-26 07:45:59 +08:00
[v1] support deepspeed (#10181)
This commit is contained in:
25
examples/v1/train_full/train_full_deepspeed.yaml
Normal file
25
examples/v1/train_full/train_full_deepspeed.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
model: Qwen/Qwen3-0.6B
|
||||
|
||||
model_class: llm
|
||||
|
||||
template: qwen3_nothink
|
||||
|
||||
kernel_config:
|
||||
name: auto
|
||||
include_kernels: auto
|
||||
|
||||
dist_config:
|
||||
name: deepspeed
|
||||
config_file: examples/deepspeed/ds_z3_config.json
|
||||
|
||||
### data
|
||||
train_dataset: data/v1_sft_demo.yaml
|
||||
|
||||
### training
|
||||
output_dir: outputs/Qwen3-0.6B-deepspeed
|
||||
micro_batch_size: 1
|
||||
cutoff_len: 2048
|
||||
learning_rate: 1.0e-4
|
||||
bf16: true
|
||||
max_steps: 10
|
||||
|
||||
Reference in New Issue
Block a user