From e8deda53a15eb016525dfeed4485ae41cb6ac1ed Mon Sep 17 00:00:00 2001 From: xvxuopop <127376094+xvxuopop@users.noreply.github.com> Date: Thu, 18 Dec 2025 21:27:00 +0800 Subject: [PATCH] [example] add Qwen3 series examples (#9624) Co-authored-by: UsernameFull --- examples/ascend/qwen3_full_sft_fsdp2.yaml | 45 +++++++++++++++++ examples/ascend/qwen3moe_full_sft_fsdp.yaml | 46 ++++++++++++++++++ .../ascend/qwen3vlmoe_full_sft_fsdp2.yaml | 48 +++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 examples/ascend/qwen3_full_sft_fsdp2.yaml create mode 100644 examples/ascend/qwen3moe_full_sft_fsdp.yaml create mode 100644 examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml diff --git a/examples/ascend/qwen3_full_sft_fsdp2.yaml b/examples/ascend/qwen3_full_sft_fsdp2.yaml new file mode 100644 index 000000000..04351ddd6 --- /dev/null +++ b/examples/ascend/qwen3_full_sft_fsdp2.yaml @@ -0,0 +1,45 @@ +# Start FSDP2 fine-tuning +# accelerate launch \ +# --config_file examples/accelerate/fsdp2_config.yaml \ +# src/train.py examples/ascend/qwen3_full_sft_fsdp2.yaml +# Change `num_processes` in fsdp2_config.yaml to 16 in A3 + +### model +model_name_or_path: Qwen/Qwen3-8B +trust_remote_code: true +use_v1_kernels: true +flash_attn: fa2 + +### method +stage: sft +do_train: true +finetuning_type: full + +### dataset +dataset: alpaca_en_demo +template: qwen3 +cutoff_len: 2048 +max_samples: 1000 +overwrite_cache: true +preprocessing_num_workers: 16 +dataloader_num_workers: 4 + +### output +output_dir: saves/Qwen3-8B/full/sft +logging_steps: 1 +save_steps: 500 +max_steps: 500 +plot_loss: true +overwrite_output_dir: true +save_only_model: false +report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] + +### train +per_device_train_batch_size: 8 +gradient_accumulation_steps: 1 +learning_rate: 1.0e-5 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 1800 +resume_from_checkpoint: null diff --git a/examples/ascend/qwen3moe_full_sft_fsdp.yaml b/examples/ascend/qwen3moe_full_sft_fsdp.yaml new file mode 100644 index 000000000..659918b10 --- /dev/null +++ b/examples/ascend/qwen3moe_full_sft_fsdp.yaml @@ -0,0 +1,46 @@ +# Start FSDP fine-tuning +# accelerate launch \ +# --config_file examples/accelerate/fsdp_config.yaml \ +# src/train.py examples/ascend/qwen3moe_full_sft_fsdp.yaml +# Change `num_processes` in fsdp_config.yaml to 16 in A3 + +### model +model_name_or_path: Qwen/Qwen3-30B-A3B-Instruct-2507 +trust_remote_code: true +use_v1_kernels: true +flash_attn: fa2 + +### method +stage: sft +do_train: true +finetuning_type: full +disable_gradient_checkpointing: false + +### dataset +dataset: alpaca_zh +template: qwen3 +cutoff_len: 1024 +overwrite_cache: true +preprocessing_num_workers: 16 +dataloader_num_workers: 4 + +### output +output_dir: saves/Qwen3-30B-A3B-Instruct-2507/full/sft +logging_steps: 1 +save_steps: 500 +max_steps: 500 +plot_loss: true +overwrite_output_dir: true +save_only_model: true +report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] + +### train +per_device_train_batch_size: 4 +gradient_accumulation_steps: 1 +learning_rate: 1.0e-4 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +seed: 1234 diff --git a/examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml b/examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml new file mode 100644 index 000000000..d841804d6 --- /dev/null +++ b/examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml @@ -0,0 +1,48 @@ +# Start FSDP2 fine-tuning +# accelerate launch \ +# --config_file examples/accelerate/fsdp2_config.yaml \ +# src/train.py examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml +# Change `num_processes` in fsdp2_config.yaml to 16 in A3 + +### model +model_name_or_path: Qwen/Qwen3-VL-30B-A3B-Instruct +image_max_pixels: 262144 +video_max_pixels: 16384 +trust_remote_code: true +use_v1_kernels: true +flash_attn: fa2 + +### method +stage: sft +do_train: true +finetuning_type: full +disable_gradient_checkpointing: false + +### dataset +dataset: llava_1k_en, llava_1k_zh +template: qwen3_vl +cutoff_len: 1024 +overwrite_cache: true +preprocessing_num_workers: 16 +dataloader_num_workers: 4 + +### output +output_dir: saves/Qwen3-VL-30B-A3B-Instruct/full/sft +logging_steps: 1 +save_steps: 500 +max_steps: 500 +plot_loss: true +overwrite_output_dir: true +save_only_model: true +report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] + +### train +per_device_train_batch_size: 2 +gradient_accumulation_steps: 1 +learning_rate: 1.0e-4 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +seed: 1234