[v1] add deepspeed zero3 trigger for low memory usage weight loading (#10300)

This commit is contained in:
jiaqiw09
2026-04-21 14:09:52 +08:00
committed by GitHub
parent f5d739b132
commit 28a6ea1cdc
13 changed files with 160 additions and 18 deletions

View File

@@ -29,7 +29,6 @@ micro_batch_size: 1
global_batch_size: 4
cutoff_len: 2048
learning_rate: 2.0e-5
bf16: false
max_steps: 10
### sample

View File

@@ -19,5 +19,4 @@ output_dir: outputs/Qwen3-0.6B-deepspeed
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10

View File

@@ -21,7 +21,6 @@ output_dir: outputs/test_fsdp2
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: false
max_steps: 10
### sample

View File

@@ -29,7 +29,6 @@ output_dir: ./outputs/test_lora
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10
### sample

View File

@@ -34,7 +34,6 @@ output_dir: outputs/test_quantization
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: false
max_steps: 10
### sample