[v1] add seed for training and fix gradient checkpointing (#10211)

This commit is contained in:
jiaqiw09
2026-02-28 18:16:06 +08:00
committed by GitHub
parent 816480012f
commit 45d335c709
7 changed files with 38 additions and 12 deletions

View File

@@ -14,16 +14,12 @@ dist_config:
name: fsdp2
dcp_path: null # /mnt/f/pretrain_models/Qwen3-0.6B-dcp
init_config:
name: init_on_meta
### data
train_dataset: data/v1_sft_demo.yaml
### training
output_dir: outputs/test_fsdp2
micro_batch_size: 1
global_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: false