[release] Bye 2025 (#9702)

2026-07-28 19:56:13 +08:00 · 2025-12-31 22:22:40 +08:00
parent 000526908a
commit 95ac3f2373
59 changed files with 154 additions and 401 deletions
--- a/examples/train_full/llama3_full_sft.yaml
+++ b/examples/train_full/llama3_full_sft.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
 trust_remote_code: true

 ### method
@@ -10,15 +10,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json,

 ### dataset
 dataset: identity,alpaca_en_demo
-template: llama3
+template: qwen3_nothink
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

 ### output
-output_dir: saves/llama3-8b/full/sft
+output_dir: saves/qwen3-4b/full/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
--- a/examples/train_full/qwen3_full_sft_autotp.yaml
+++ b/examples/train_full/qwen3_full_sft_autotp.yaml
@@ -1,46 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen3-32B
-trust_remote_code: true
-use_v1_kernels: true
-
-### method
-stage: sft
-do_train: true
-finetuning_type: full
-deepspeed: examples/deepspeed/ds_z2_autotp_config.json
-
-### dataset
-dataset: identity,alpaca_en_demo
-template: qwen3
-cutoff_len: 2048
-max_samples: 1000
-overwrite_cache: true
-preprocessing_num_workers: 16
-dataloader_num_workers: 4
-
-### output
-output_dir: saves/qwen3-32b/full/sft_autotp
-logging_steps: 1
-save_steps: 500
-plot_loss: true
-overwrite_output_dir: true
-save_only_model: false
-report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
-
-### train
-per_device_train_batch_size: 4
-gradient_accumulation_steps: 1
-learning_rate: 1.0e-4
-num_train_epochs: 3.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-resume_from_checkpoint: null
-
-### eval
-# eval_dataset: alpaca_en_demo
-# val_size: 0.1
-# per_device_eval_batch_size: 1
-# eval_strategy: steps
-# eval_steps: 500
--- a/examples/train_full/qwen2_5vl_full_sft.yaml
+++ b/examples/train_full/qwen2_5vl_full_sft.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
+model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
 image_max_pixels: 262144
 video_max_pixels: 16384
 trust_remote_code: true
@@ -15,15 +15,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json

 ### dataset
 dataset: mllm_demo,identity,alpaca_en_demo
-template: qwen2_vl
+template: qwen3_vl_nothink
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

 ### output
-output_dir: saves/qwen2_5vl-7b/full/sft
+output_dir: saves/qwen3-vl-4b/full/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true