mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-03-03 02:05:59 +08:00
[v1] add seed for training and fix gradient checkpointing (#10211)
This commit is contained in:
@@ -14,16 +14,12 @@ dist_config:
|
||||
name: fsdp2
|
||||
dcp_path: null # /mnt/f/pretrain_models/Qwen3-0.6B-dcp
|
||||
|
||||
init_config:
|
||||
name: init_on_meta
|
||||
|
||||
### data
|
||||
train_dataset: data/v1_sft_demo.yaml
|
||||
|
||||
### training
|
||||
output_dir: outputs/test_fsdp2
|
||||
micro_batch_size: 1
|
||||
global_batch_size: 1
|
||||
cutoff_len: 2048
|
||||
learning_rate: 1.0e-4
|
||||
bf16: false
|
||||
|
||||
Reference in New Issue
Block a user