[v1] add deepspeed zero3 trigger for low memory usage weight loading (#10300)

This commit is contained in:
jiaqiw09
2026-04-21 14:09:52 +08:00
committed by GitHub
parent f5d739b132
commit 28a6ea1cdc
13 changed files with 160 additions and 18 deletions

View File

@@ -19,5 +19,4 @@ output_dir: outputs/Qwen3-0.6B-deepspeed
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10