[release] Bye 2025 (#9702)

2026-02-26 15:56:00 +08:00 · 2025-12-31 22:22:40 +08:00
parent 000526908a
commit 95ac3f2373
59 changed files with 154 additions and 401 deletions
--- a/examples/train_qlora/llama3_lora_sft_aqlm.yaml
+++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

--- a/examples/train_qlora/llama3_lora_sft_awq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_awq.yaml
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

--- a/examples/train_qlora/llama3_lora_sft_gptq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
 template: llama3
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

--- a/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
+++ b/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
 quantization_bit: 4
 quantization_method: bnb
 double_quantization: false
@@ -14,15 +14,14 @@ lora_target: all

 ### dataset
 dataset: identity,alpaca_en_demo
-template: llama3
+template: qwen3_nothink
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

 ### output
-output_dir: saves/llama3-8b/lora/sft
+output_dir: saves/qwen3-4b/lora/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true
--- a/examples/train_qlora/llama3_lora_sft_otfq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_otfq.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
 quantization_bit: 4  # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
 quantization_method: bnb  # choices: [bnb, hqq, eetq]
 trust_remote_code: true
@@ -13,15 +13,14 @@ lora_target: all

 ### dataset
 dataset: identity,alpaca_en_demo
-template: llama3
+template: qwen3_nothink
 cutoff_len: 2048
 max_samples: 1000
-overwrite_cache: true
 preprocessing_num_workers: 16
 dataloader_num_workers: 4

 ### output
-output_dir: saves/llama3-8b/lora/sft
+output_dir: saves/qwen3-4b/lora/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true